Repository: official-stockfish/Stockfish Branch: master Commit: d173a0655d04 Files: 100 Total size: 1017.5 KB Directory structure: gitextract_iuycd67k/ ├── .clang-format ├── .git-blame-ignore-revs ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── BUG-REPORT.yml │ │ └── config.yml │ ├── ci/ │ │ ├── arm_matrix.json │ │ ├── libcxx17.imp │ │ └── matrix.json │ └── workflows/ │ ├── arm_compilation.yml │ ├── avx2_compilers.yml │ ├── clang-format.yml │ ├── codeql.yml │ ├── compilation.yml │ ├── games.yml │ ├── iwyu.yml │ ├── matetrack.yml │ ├── sanitizers.yml │ ├── stockfish.yml │ ├── tests.yml │ └── upload_binaries.yml ├── .gitignore ├── AUTHORS ├── CITATION.cff ├── CONTRIBUTING.md ├── Copying.txt ├── README.md ├── Top CPU Contributors.txt ├── scripts/ │ ├── .gitattributes │ ├── get_native_properties.sh │ └── net.sh ├── src/ │ ├── Makefile │ ├── benchmark.cpp │ ├── benchmark.h │ ├── bitboard.cpp │ ├── bitboard.h │ ├── engine.cpp │ ├── engine.h │ ├── evaluate.cpp │ ├── evaluate.h │ ├── history.h │ ├── incbin/ │ │ ├── UNLICENCE │ │ └── incbin.h │ ├── main.cpp │ ├── memory.cpp │ ├── memory.h │ ├── misc.cpp │ ├── misc.h │ ├── movegen.cpp │ ├── movegen.h │ ├── movepick.cpp │ ├── movepick.h │ ├── nnue/ │ │ ├── features/ │ │ │ ├── full_threats.cpp │ │ │ ├── full_threats.h │ │ │ ├── half_ka_v2_hm.cpp │ │ │ └── half_ka_v2_hm.h │ │ ├── layers/ │ │ │ ├── affine_transform.h │ │ │ ├── affine_transform_sparse_input.h │ │ │ ├── clipped_relu.h │ │ │ └── sqr_clipped_relu.h │ │ ├── network.cpp │ │ ├── network.h │ │ ├── nnue_accumulator.cpp │ │ ├── nnue_accumulator.h │ │ ├── nnue_architecture.h │ │ ├── nnue_common.h │ │ ├── nnue_feature_transformer.h │ │ ├── nnue_misc.cpp │ │ ├── nnue_misc.h │ │ └── simd.h │ ├── numa.h │ ├── perft.h │ ├── position.cpp │ ├── position.h │ ├── score.cpp │ ├── score.h │ ├── search.cpp │ ├── search.h │ ├── shm.h │ ├── shm_linux.h │ ├── syzygy/ │ │ ├── tbprobe.cpp │ │ └── tbprobe.h │ ├── thread.cpp │ ├── thread.h │ ├── thread_win32_osx.h │ ├── timeman.cpp │ ├── timeman.h │ ├── tt.cpp │ ├── tt.h │ ├── tune.cpp │ ├── tune.h │ ├── types.h │ ├── uci.cpp │ ├── uci.h │ ├── ucioption.cpp │ └── ucioption.h └── tests/ ├── .gitattributes ├── instrumented.py ├── perft.sh ├── reprosearch.sh ├── signature.sh └── testing.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: Consecutive AlignConsecutiveDeclarations: Consecutive AlignEscapedNewlines: DontAlign AlignOperands: AlignAfterOperator AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortCaseLabelsOnASingleLine: false AllowShortEnumsOnASingleLine: false AllowShortIfStatementsOnASingleLine: false BreakTemplateDeclarations: Yes BasedOnStyle: WebKit BitFieldColonSpacing: After BinPackParameters: false BreakBeforeBinaryOperators: NonAssignment BreakBeforeBraces: Custom BraceWrapping: AfterFunction: false AfterClass: false AfterControlStatement: true BeforeElse: true BreakBeforeTernaryOperators: true BreakConstructorInitializers: AfterColon BreakStringLiterals: false ColumnLimit: 100 ContinuationIndentWidth: 2 Cpp11BracedListStyle: true IndentGotoLabels: false IndentPPDirectives: BeforeHash IndentWidth: 4 MaxEmptyLinesToKeep: 2 NamespaceIndentation: None PackConstructorInitializers: Never ReflowComments: false SortIncludes: false SortUsingDeclarations: false SpaceAfterCStyleCast: true SpaceAfterTemplateKeyword: false SpaceBeforeCaseColon: true SpaceBeforeCpp11BracedList: false SpaceBeforeInheritanceColon: false SpaceInEmptyBlock: false SpacesBeforeTrailingComments: 2 ================================================ FILE: .git-blame-ignore-revs ================================================ # .git-blame-ignore-revs # Ignore commit which added clang-format 2d0237db3f0e596fb06e3ffbadba84dcc4e018f6 # Post commit formatting fixes 0fca5605fa2e5e7240fde5e1aae50952b2612231 08ed4c90db31959521b7ef3186c026edd1e90307 ================================================ FILE: .github/ISSUE_TEMPLATE/BUG-REPORT.yml ================================================ name: Report issue description: Create a report to help us fix issues with the engine body: - type: textarea attributes: label: Describe the issue description: A clear and concise description of what you're experiencing. validations: required: true - type: textarea attributes: label: Expected behavior description: A clear and concise description of what you expected to happen. validations: required: true - type: textarea attributes: label: Steps to reproduce description: | Steps to reproduce the behavior. You can also use this section to paste the command line output. placeholder: | ``` position startpos moves g2g4 e7e5 f2f3 go mate 1 info string NNUE evaluation using nn-6877cd24400e.nnue enabled info depth 1 seldepth 1 multipv 1 score mate 1 nodes 33 nps 11000 tbhits 0 time 3 pv d8h4 bestmove d8h4 ``` validations: required: true - type: textarea attributes: label: Anything else? description: | Anything that will give us more context about the issue you are encountering. You can also use this section to propose ideas on how to solve the issue. validations: required: false - type: dropdown attributes: label: Operating system options: - All - Windows - Linux - MacOS - Android - Other or N/A validations: required: true - type: input attributes: label: Stockfish version description: | This can be found by running the engine. You can also use the commit ID. placeholder: Stockfish 15 / e6e324e validations: required: true ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Discord server url: https://discord.gg/GWDRS3kU6R about: Feel free to ask for support or have a chat with us on our Discord server! - name: Discussions, Q&A, ideas, show us something... url: https://github.com/official-stockfish/Stockfish/discussions/new about: Do you have an idea for Stockfish? Do you want to show something that you made? Please open a discussion about it! ================================================ FILE: .github/ci/arm_matrix.json ================================================ { "config": [ { "name": "Android NDK aarch64", "os": "ubuntu-22.04", "simple_name": "android", "compiler": "aarch64-linux-android29-clang++", "emu": "qemu-aarch64", "comp": "ndk", "shell": "bash", "archive_ext": "tar" }, { "name": "Android NDK arm", "os": "ubuntu-22.04", "simple_name": "android", "compiler": "armv7a-linux-androideabi29-clang++", "emu": "qemu-arm", "comp": "ndk", "shell": "bash", "archive_ext": "tar" } ], "binaries": ["armv8-dotprod", "armv8", "armv7", "armv7-neon"], "exclude": [ { "binaries": "armv8-dotprod", "config": { "compiler": "armv7a-linux-androideabi29-clang++" } }, { "binaries": "armv8", "config": { "compiler": "armv7a-linux-androideabi29-clang++" } }, { "binaries": "armv7", "config": { "compiler": "aarch64-linux-android29-clang++" } }, { "binaries": "armv7-neon", "config": { "compiler": "aarch64-linux-android29-clang++" } } ] } ================================================ FILE: .github/ci/libcxx17.imp ================================================ [ # Mappings for libcxx's internal headers { include: [ "<__fwd/fstream.h>", private, "", public ] }, { include: [ "<__fwd/ios.h>", private, "", public ] }, { include: [ "<__fwd/istream.h>", private, "", public ] }, { include: [ "<__fwd/ostream.h>", private, "", public ] }, { include: [ "<__fwd/sstream.h>", private, "", public ] }, { include: [ "<__fwd/streambuf.h>", private, "", public ] }, { include: [ "<__fwd/string_view.h>", private, "", public ] }, { include: [ "<__system_error/errc.h>", private, "", public ] }, # Mappings for includes between public headers { include: [ "", public, "", public ] }, { include: [ "", public, "", public ] }, { include: [ "", public, "", public ] }, { include: [ "", public, "", public ] }, { include: [ "", public, "", public ] }, # Missing mappings in include-what-you-use's libcxx.imp { include: ["@<__condition_variable/.*>", private, "", public ] }, { include: ["@<__mutex/.*>", private, "", public ] }, ] ================================================ FILE: .github/ci/matrix.json ================================================ { "config": [ { "name": "Ubuntu 22.04 GCC", "os": "ubuntu-22.04", "simple_name": "ubuntu", "compiler": "g++", "comp": "gcc", "shell": "bash", "archive_ext": "tar", "sde": "/home/runner/work/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.33.0-2024-01-07-lin/sde -future --" }, { "name": "macOS 15 Apple Clang", "os": "macos-15-intel", "simple_name": "macos", "compiler": "clang++", "comp": "clang", "shell": "bash", "archive_ext": "tar" }, { "name": "macOS 15 Apple Clang M1", "os": "macos-15", "simple_name": "macos-m1", "compiler": "clang++", "comp": "clang", "shell": "bash", "archive_ext": "tar" }, { "name": "Windows 2022 Mingw-w64 GCC x86_64", "os": "windows-2022", "simple_name": "windows", "compiler": "g++", "comp": "mingw", "msys_sys": "mingw64", "msys_env": "x86_64-gcc", "shell": "msys2 {0}", "ext": ".exe", "sde": "/d/a/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.33.0-2024-01-07-win/sde.exe -future --", "archive_ext": "zip" }, { "name": "Windows 11 Mingw-w64 Clang arm64", "os": "windows-11-arm", "simple_name": "windows", "compiler": "clang++", "comp": "clang", "msys_sys": "clangarm64", "msys_env": "clang-aarch64-clang", "shell": "msys2 {0}", "ext": ".exe", "archive_ext": "zip" } ], "binaries": [ "x86-64", "x86-64-sse41-popcnt", "x86-64-avx2", "x86-64-bmi2", "x86-64-avxvnni", "x86-64-avx512", "x86-64-vnni512", "x86-64-avx512icl", "apple-silicon", "armv8", "armv8-dotprod" ], "exclude": [ { "binaries": "x86-64", "config": { "os": "macos-15" } }, { "binaries": "x86-64-sse41-popcnt", "config": { "os": "macos-15" } }, { "binaries": "x86-64-avx2", "config": { "os": "macos-15" } }, { "binaries": "x86-64-bmi2", "config": { "os": "macos-15" } }, { "binaries": "x86-64-avxvnni", "config": { "os": "macos-15" } }, { "binaries": "x86-64-avx512", "config": { "os": "macos-15" } }, { "binaries": "x86-64-vnni512", "config": { "os": "macos-15" } }, { "binaries": "x86-64-avx512icl", "config": { "os": "macos-15" } }, { "binaries": "x86-64-avxvnni", "config": { "os": "macos-15-intel" } }, { "binaries": "x86-64-avx512", "config": { "os": "macos-15-intel" } }, { "binaries": "x86-64-vnni512", "config": { "os": "macos-15-intel" } }, { "binaries": "x86-64-avx512icl", "config": { "os": "macos-15-intel" } }, { "binaries": "x86-64", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-sse41-popcnt", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-avx2", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-bmi2", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-avxvnni", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-avx512", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-vnni512", "config": { "os": "windows-11-arm" } }, { "binaries": "x86-64-avx512icl", "config": { "os": "windows-11-arm" } }, { "binaries": "apple-silicon", "config": { "os": "windows-2022" } }, { "binaries": "apple-silicon", "config": { "os": "windows-11-arm" } }, { "binaries": "apple-silicon", "config": { "os": "ubuntu-20.04" } }, { "binaries": "apple-silicon", "config": { "os": "ubuntu-22.04" } }, { "binaries": "apple-silicon", "config": { "os": "macos-15-intel" } }, { "binaries": "armv8", "config": { "os": "windows-2022" } }, { "binaries": "armv8", "config": { "os": "ubuntu-20.04" } }, { "binaries": "armv8", "config": { "os": "ubuntu-22.04" } }, { "binaries": "armv8", "config": { "os": "macos-15-intel" } }, { "binaries": "armv8", "config": { "os": "macos-15" } }, { "binaries": "armv8-dotprod", "config": { "os": "windows-2022" } }, { "binaries": "armv8-dotprod", "config": { "os": "ubuntu-20.04" } }, { "binaries": "armv8-dotprod", "config": { "os": "ubuntu-22.04" } }, { "binaries": "armv8-dotprod", "config": { "os": "macos-15-intel" } }, { "binaries": "armv8-dotprod", "config": { "os": "macos-15" } } ] } ================================================ FILE: .github/workflows/arm_compilation.yml ================================================ name: Compilation on: workflow_call: inputs: matrix: type: string required: true jobs: Compilation: name: ${{ matrix.config.name }} ${{ matrix.binaries }} runs-on: ${{ matrix.config.os }} env: COMPCXX: ${{ matrix.config.compiler }} COMP: ${{ matrix.config.comp }} EMU: ${{ matrix.config.emu }} EXT: ${{ matrix.config.ext }} BINARY: ${{ matrix.binaries }} strategy: fail-fast: false matrix: ${{ fromJson(inputs.matrix) }} defaults: run: working-directory: src shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - name: Download required linux packages if: runner.os == 'Linux' run: | sudo apt update sudo apt install qemu-user - name: Install NDK if: runner.os == 'Linux' run: | if [ $COMP == ndk ]; then NDKV="27.2.12479018" ANDROID_ROOT=/usr/local/lib/android ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager echo "y" | $SDKMANAGER "ndk;$NDKV" ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV fi - name: Extract the bench number from the commit history run: | for hash in $(git rev-list -100 HEAD); do benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true done [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found" - name: Download the used network from the fishtest framework run: make net - name: Check compiler run: | if [ $COMP == ndk ]; then export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH fi $COMPCXX -v - name: Test help target run: make help - name: Check git run: git --version # Compile profile guided builds - name: Compile ${{ matrix.binaries }} build run: | if [ $COMP == ndk ]; then export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" fi make clean make -j4 profile-build ARCH=$BINARY COMP=$COMP RUN_PREFIX=$EMU make strip ARCH=$BINARY COMP=$COMP RUN_PREFIX=$EMU ../tests/signature.sh $benchref mv ./stockfish$EXT ../stockfish-android-$BINARY$EXT - name: Remove non src files run: git clean -fx - name: Upload artifact for (pre)-release uses: actions/upload-artifact@v4 with: name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} path: | . !.git !.output ================================================ FILE: .github/workflows/avx2_compilers.yml ================================================ name: AVX2 Compiler Matrix on: workflow_call: jobs: avx2-compiler-matrix: name: avx2 (${{ matrix.name }}) runs-on: ubuntu-latest container: image: ${{ matrix.image }} strategy: fail-fast: false matrix: include: - { name: gcc-10, comp: gcc, cxx: g++, image: "gcc:10" } - { name: gcc-11, comp: gcc, cxx: g++, image: "gcc:11" } - { name: gcc-12, comp: gcc, cxx: g++, image: "gcc:12" } - { name: gcc-13, comp: gcc, cxx: g++, image: "gcc:13" } - { name: gcc-14, comp: gcc, cxx: g++, image: "gcc:14" } - { name: gcc-15, comp: gcc, cxx: g++, image: "gcc:15" } # Using silkeh/clang for older versions - { name: clang-10, comp: clang, cxx: clang++, image: "silkeh/clang:10", is_clang: true, ver: "10" } - { name: clang-11, comp: clang, cxx: clang++, image: "silkeh/clang:11", is_clang: true, ver: "11" } - { name: clang-12, comp: clang, cxx: clang++, image: "silkeh/clang:12", is_clang: true, ver: "12" } - { name: clang-13, comp: clang, cxx: clang++, image: "silkeh/clang:13", is_clang: true, ver: "13" } - { name: clang-14, comp: clang, cxx: clang++, image: "silkeh/clang:14", is_clang: true, ver: "14" } - { name: clang-15, comp: clang, cxx: clang++, image: "silkeh/clang:15", is_clang: true, ver: "15" } - { name: clang-16, comp: clang, cxx: clang++, image: "silkeh/clang:16", is_clang: true, ver: "16" } - { name: clang-17, comp: clang, cxx: clang++, image: "silkeh/clang:17", is_clang: true, ver: "17" } - { name: clang-18, comp: clang, cxx: clang++-18, image: "ubuntu:rolling", is_clang: true, ver: "18" } - { name: clang-19, comp: clang, cxx: clang++-19, image: "ubuntu:rolling", is_clang: true, ver: "19" } - { name: clang-20, comp: clang, cxx: clang++-20, image: "ubuntu:rolling", is_clang: true, ver: "20" } - { name: clang-21, comp: clang, cxx: clang++-21, image: "ubuntu:rolling", is_clang: true, ver: "21" } steps: - name: Checkout uses: actions/checkout@v4 - name: Install dependencies run: | if grep -q "buster" /etc/os-release; then echo "Debian Buster detected. Switching to archive repositories..." echo "deb http://archive.debian.org/debian buster main contrib non-free" > /etc/apt/sources.list echo "deb http://archive.debian.org/debian-security buster/updates main contrib non-free" >> /etc/apt/sources.list echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-ignore-valid-until fi apt-get update apt-get install -y curl git make - name: Set up Clang if: ${{ matrix.is_clang && matrix.image == 'ubuntu:rolling' }} run: | if [ "${{ matrix.ver }}" -le 20 ]; then apt-get install -y clang-${{ matrix.ver }} else apt-get install -y \ clang-${{ matrix.ver }} \ llvm-${{ matrix.ver }}-dev \ llvm-${{ matrix.ver }}-linker-tools \ lld-${{ matrix.ver }} fi - name: Download network working-directory: src run: make net - name: Build avx2 binary working-directory: src run: | export CXXFLAGS="-Werror" if [ "${{ matrix.ver }}" -ge 20 ]; then apt install -y lld fi make clean make -j build ARCH=x86-64-avx2 COMP=${{ matrix.comp }} COMPCXX=${{ matrix.cxx }} - name: Smoke test working-directory: src run: ./stockfish bench 16 1 6 ================================================ FILE: .github/workflows/clang-format.yml ================================================ # This workflow will run clang-format and comment on the PR. # Because of security reasons, it is crucial that this workflow # executes no shell script nor runs make. # Read this before editing: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ name: Clang-Format on: pull_request_target: branches: - "master" paths: - "**.cpp" - "**.h" permissions: pull-requests: write jobs: Clang-Format: name: Clang-Format runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Run clang-format style check uses: jidicula/clang-format-action@4726374d1aa3c6aecf132e5197e498979588ebc8 # @v4.15.0 id: clang-format continue-on-error: true with: clang-format-version: "20" exclude-regex: "incbin" - name: Comment on PR if: steps.clang-format.outcome == 'failure' uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 with: message: | clang-format 20 needs to be run on this PR. If you do not have clang-format installed, the maintainer will run it when merging. For the exact version please see https://packages.ubuntu.com/plucky/clang-format-20. _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ comment_tag: execution GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Comment on PR if: steps.clang-format.outcome != 'failure' uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0 with: message: | _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_ create_if_not_exists: false comment_tag: execution mode: delete GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/codeql.yml ================================================ name: "CodeQL" on: push: branches: ["master"] pull_request: # The branches below must be a subset of the branches above branches: ["master"] schedule: - cron: "17 18 * * 1" jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: ["cpp"] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] # Use only 'java' to analyze code written in Java, Kotlin, or both # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - name: Checkout repository uses: actions/checkout@v4 with: persist-credentials: false # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality - name: Build working-directory: src run: make -j build - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" ================================================ FILE: .github/workflows/compilation.yml ================================================ name: Compilation on: workflow_call: inputs: matrix: type: string required: true jobs: Compilation: name: ${{ matrix.config.name }} ${{ matrix.binaries }} runs-on: ${{ matrix.config.os }} env: COMPCXX: ${{ matrix.config.compiler }} COMP: ${{ matrix.config.comp }} EXT: ${{ matrix.config.ext }} NAME: ${{ matrix.config.simple_name }} BINARY: ${{ matrix.binaries }} SDE: ${{ matrix.config.sde }} strategy: fail-fast: false matrix: ${{ fromJson(inputs.matrix) }} defaults: run: working-directory: src shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 with: persist-credentials: false - name: Install fixed GCC on Linux if: runner.os == 'Linux' uses: egor-tensin/setup-gcc@eaa888eb19115a521fa72b65cd94fe1f25bbcaac # @v1.3 with: version: 11 - name: Setup msys and install required packages if: runner.os == 'Windows' uses: msys2/setup-msys2@v2 with: msystem: ${{ matrix.config.msys_sys }} install: mingw-w64-${{ matrix.config.msys_env }} make git zip - name: Download SDE package if: runner.os == 'Linux' || runner.os == 'Windows' uses: petarpetrovt/setup-sde@f0fa5971dc275704531e94264dd23250c442aa41 # @v2.4 with: environmentVariableName: SDE_DIR sdeVersion: 9.33.0 - name: Download the used network from the fishtest framework run: make net - name: Check compiler run: $COMPCXX -v - name: Test help target run: make help - name: Check git run: git --version - name: Check compiler run: $COMPCXX -v - name: Show compiler cpu info run: | if [[ "$COMPCXX" == clang* ]]; then $COMPCXX -E - -march=native -### else $COMPCXX -Q -march=native --help=target fi # x86-64 with newer extensions tests - name: Compile ${{ matrix.config.binaries }} build run: | make clean make -j4 profile-build ARCH=$BINARY COMP=$COMP RUN_PREFIX="$SDE" make strip ARCH=$BINARY COMP=$COMP RUN_PREFIX="$SDE" ../tests/signature.sh $benchref mv ./stockfish$EXT ../stockfish-$NAME-$BINARY$EXT - name: Remove non src files run: git clean -fx - name: Upload artifact for (pre)-release uses: actions/upload-artifact@v4 with: name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} path: | . !.git !.output ================================================ FILE: .github/workflows/games.yml ================================================ # This workflow will play games with a debug enabled SF using the PR name: Games on: workflow_call: jobs: Matetrack: name: Games runs-on: ubuntu-22.04 steps: - name: Checkout SF repo uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} path: Stockfish persist-credentials: false - name: build debug enabled version of SF working-directory: Stockfish/src run: make -j build debug=yes - name: Checkout fastchess repo uses: actions/checkout@v4 with: repository: Disservin/fastchess path: fastchess ref: 894616028492ae6114835195f14a899f6fa237d3 persist-credentials: false - name: fastchess build working-directory: fastchess run: make -j - name: Run games working-directory: fastchess run: | ./fastchess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\ -engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\ -engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\ -ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out cat fast.log ! grep "Assertion" fast.log > /dev/null ! grep "disconnect" fast.out > /dev/null ================================================ FILE: .github/workflows/iwyu.yml ================================================ name: IWYU on: workflow_call: jobs: Analyzers: name: Check includes runs-on: ubuntu-22.04 defaults: run: working-directory: Stockfish/src shell: bash steps: - name: Checkout Stockfish uses: actions/checkout@v4 with: path: Stockfish persist-credentials: false - name: Checkout include-what-you-use uses: actions/checkout@v4 with: repository: include-what-you-use/include-what-you-use ref: f25caa280dc3277c4086ec345ad279a2463fea0f path: include-what-you-use persist-credentials: false - name: Download required linux packages run: | sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main' wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - sudo apt update sudo apt install -y libclang-17-dev clang-17 libc++-17-dev - name: Set up include-what-you-use run: | mkdir build && cd build cmake -G "Unix Makefiles" -DCMAKE_PREFIX_PATH="/usr/lib/llvm-17" .. sudo make install working-directory: include-what-you-use - name: Check include-what-you-use run: include-what-you-use --version - name: Check includes run: > make analyze COMP=clang CXX=include-what-you-use CXXFLAGS="-stdlib=libc++ -Xiwyu --comment_style=long -Xiwyu --mapping='${{ github.workspace }}/Stockfish/.github/ci/libcxx17.imp' -Xiwyu --error" ================================================ FILE: .github/workflows/matetrack.yml ================================================ # This workflow will run matetrack on the PR name: Matetrack on: workflow_call: jobs: Matetrack: name: Matetrack runs-on: ubuntu-22.04 steps: - name: Checkout SF repo uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} path: Stockfish persist-credentials: false - name: build SF working-directory: Stockfish/src run: make -j profile-build - name: Checkout matetrack repo uses: actions/checkout@v4 with: repository: vondele/matetrack path: matetrack ref: 6c8405fac9028ca66a077f5c96c918fec0ef8d1d persist-credentials: false - name: matetrack install deps working-directory: matetrack run: pip install -r requirements.txt - name: cache syzygy id: cache-syzygy uses: actions/cache@v4 with: path: | matetrack/3-4-5-wdl/ matetrack/3-4-5-dtz/ key: key-syzygy - name: download syzygy 3-4-5 if needed working-directory: matetrack if: steps.cache-syzygy.outputs.cache-hit != 'true' run: | wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/ wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/ - name: Run matetrack th1 working-directory: matetrack run: | python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheck1.out ! grep "issues were detected" matecheck1.out > /dev/null - name: Run matetrack th4 working-directory: matetrack run: | python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 --threads 4 | tee matecheck4.out ! grep "issues were detected" matecheck4.out > /dev/null - name: Run matetrack th4 gameplay working-directory: matetrack run: | python matecheck.py --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --time 3 --timeinc 0.01 --threads 4 | tee matecheck4g.out ! grep "issues were detected" matecheck4g.out > /dev/null - name: Run matetrack th4 go-mate working-directory: matetrack run: | head -n 21 matetrack.epd > gomates.epd head -n 44 matedtrack.epd >> gomates.epd head -n 18 mates2000.epd >> gomates.epd python matecheck.py --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile gomates.epd --mate 0 --threads 4 | tee matecheck4gm.out ! grep "issues were detected" matecheck4gm.out > /dev/null total=$(grep "Total FENs:" matecheck4gm.out | awk '{print $3}') bmates=$(grep "Best mates:" matecheck4gm.out | awk '{print $3}') if [ $bmates -ne $total ]; then echo "At least one go-mate search did not yield expected mate, see matecheck4gm.out" >&2 exit 1 fi - name: Run matetrack th1 with --syzygy50MoveRule false working-directory: matetrack run: | grep 5men cursed.epd > cursed5.epd python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile cursed5.epd --nodes 100000 --syzygy50MoveRule false | tee matecheckcursed1.out ! grep "issues were detected" matecheckcursed1.out > /dev/null - name: Run matetrack th4 with --syzygy50MoveRule false working-directory: matetrack run: | grep 5men cursed.epd > cursed5.epd python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile cursed5.epd --nodes 100000 --threads 4 --syzygy50MoveRule false | tee matecheckcursed4.out ! grep "issues were detected" matecheckcursed4.out > /dev/null - name: Verify mate and TB win count for matecheckcursed[14].out working-directory: matetrack run: | mates=$(grep "Found mates:" matecheckcursed1.out | awk '{print $3}') tbwins=$(grep "Found TB wins:" matecheckcursed1.out | awk '{print $4}') if [ $(($mates + $tbwins)) -ne 32 ]; then echo "Sum of mates and TB wins is not 32 in matecheckcursed1.out" >&2 exit 1 fi mates=$(grep "Found mates:" matecheckcursed4.out | awk '{print $3}') tbwins=$(grep "Found TB wins:" matecheckcursed4.out | awk '{print $4}') if [ $(($mates + $tbwins)) -ne 32 ]; then echo "Sum of mates and TB wins is not 32 in matecheckcursed4.out" >&2 exit 1 fi ================================================ FILE: .github/workflows/sanitizers.yml ================================================ name: Sanitizers on: workflow_call: jobs: Test-under-sanitizers: name: ${{ matrix.sanitizers.name }} runs-on: ${{ matrix.config.os }} env: COMPCXX: ${{ matrix.config.compiler }} COMP: ${{ matrix.config.comp }} CXXFLAGS: "-Werror" strategy: fail-fast: false matrix: config: - name: Ubuntu 22.04 GCC os: ubuntu-22.04 compiler: g++ comp: gcc shell: bash sanitizers: - name: Run with thread sanitizer make_option: sanitize=thread cxx_extra_flags: "" instrumented_option: sanitizer-thread - name: Run with UB sanitizer make_option: sanitize=undefined cxx_extra_flags: "" instrumented_option: sanitizer-undefined - name: Run under valgrind make_option: "" cxx_extra_flags: "" instrumented_option: valgrind - name: Run under valgrind-thread make_option: "" cxx_extra_flags: "" instrumented_option: valgrind-thread - name: Run non-instrumented make_option: "" cxx_extra_flags: "" instrumented_option: none - name: Run with glibcxx assertions make_option: "" cxx_extra_flags: -D_GLIBCXX_ASSERTIONS instrumented_option: none defaults: run: working-directory: src shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 with: persist-credentials: false - name: Download required linux packages run: | sudo apt update sudo apt install expect valgrind g++-multilib - name: Download the used network from the fishtest framework run: make net - name: Check compiler run: $COMPCXX -v - name: Test help target run: make help - name: Check git run: git --version # Since Linux Kernel 6.5 we are getting false positives from the ci, # lower the ALSR entropy to disable ALSR, which works as a temporary workaround. # https://github.com/google/sanitizers/issues/1716 # https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762 - name: Lower ALSR entropy run: sudo sysctl -w vm.mmap_rnd_bits=28 # Sanitizers - name: ${{ matrix.sanitizers.name }} run: | export CXXFLAGS="-O1 -fno-inline ${{ matrix.sanitizers.cxx_extra_flags }}" make clean make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null python3 ../tests/instrumented.py --${{ matrix.sanitizers.instrumented_option }} ./stockfish ================================================ FILE: .github/workflows/stockfish.yml ================================================ name: Stockfish on: push: tags: - "*" branches: - master - tools - github_ci pull_request: branches: - master - tools jobs: Prerelease: if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag')) runs-on: ubuntu-latest needs: [Matrix] permissions: contents: write # For deleting/creating a prerelease steps: - uses: actions/checkout@v4 with: persist-credentials: false # returns null if no pre-release exists - name: Get Commit SHA of Latest Pre-release run: | # Install required packages sudo apt-get update sudo apt-get install -y curl jq echo "COMMIT_SHA_TAG=$(jq -r 'map(select(.prerelease)) | first | .tag_name' <<< $(curl -s https://api.github.com/repos/${{ github.repository_owner }}/Stockfish/releases))" >> $GITHUB_ENV # delete old previous pre-release and tag - run: gh release delete ${{ env.COMMIT_SHA_TAG }} --cleanup-tag if: env.COMMIT_SHA_TAG != 'null' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Make sure that an old ci that still runs on master doesn't recreate a prerelease - name: Check Pullable Commits id: check_commits run: | git fetch CHANGES=$(git rev-list HEAD..origin/master --count) echo "CHANGES=$CHANGES" >> $GITHUB_ENV - name: Get last commit SHA id: last_commit run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV - name: Get commit date id: commit_date run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV - name: Official Release? id: official_release # Check for "Official release version of Stockfish" in the commit message run: | if git log -1 --pretty=%B | grep -q "Official release version of Stockfish"; then echo "OFFICIAL_RELEASE=true" >> $GITHUB_ENV else echo "OFFICIAL_RELEASE=false" >> $GITHUB_ENV fi # Create a new pre-release, the other upload_binaries.yml will upload the binaries # to this pre-release. - name: Create Prerelease if: github.ref_name == 'master' && env.CHANGES == '0' && env.OFFICIAL_RELEASE == 'false' uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 with: name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} prerelease: true Matrix: runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }} steps: - uses: actions/checkout@v4 with: persist-credentials: false - id: set-matrix run: | TASKS=$(echo $(cat .github/ci/matrix.json) ) echo "MATRIX=$TASKS" >> $GITHUB_OUTPUT - id: set-arm-matrix run: | TASKS_ARM=$(echo $(cat .github/ci/arm_matrix.json) ) echo "ARM_MATRIX=$TASKS_ARM" >> $GITHUB_OUTPUT # Testing Jobs IWYU: uses: ./.github/workflows/iwyu.yml Sanitizers: if: ${{ always() }} uses: ./.github/workflows/sanitizers.yml Tests: if: ${{ always() }} uses: ./.github/workflows/tests.yml Matetrack: if: ${{ always() }} uses: ./.github/workflows/matetrack.yml Games: if: ${{ always() }} uses: ./.github/workflows/games.yml CompilerCheck: if: ${{ always() }} uses: ./.github/workflows/avx2_compilers.yml # Release Jobs Compilation: needs: [Matrix, Sanitizers, Tests, Matetrack, Games, CompilerCheck] uses: ./.github/workflows/compilation.yml with: matrix: ${{ needs.Matrix.outputs.matrix }} ARMCompilation: needs: [Matrix, Sanitizers, Tests, Matetrack, Games, CompilerCheck] uses: ./.github/workflows/arm_compilation.yml with: matrix: ${{ needs.Matrix.outputs.arm_matrix }} Binaries: if: github.repository == 'official-stockfish/Stockfish' needs: [Prerelease, Matrix, Compilation] uses: ./.github/workflows/upload_binaries.yml with: matrix: ${{ needs.Matrix.outputs.matrix }} permissions: contents: write # For deleting/creating a (pre)release secrets: token: ${{ secrets.GITHUB_TOKEN }} ARM_Binaries: if: github.repository == 'official-stockfish/Stockfish' needs: [Prerelease, Matrix, ARMCompilation] uses: ./.github/workflows/upload_binaries.yml with: matrix: ${{ needs.Matrix.outputs.arm_matrix }} permissions: contents: write # For deleting/creating a (pre)release secrets: token: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/tests.yml ================================================ name: Tests on: workflow_call: jobs: Test-Targets: name: ${{ matrix.config.name }} runs-on: ${{ matrix.config.os }} env: COMPCXX: ${{ matrix.config.compiler }} COMP: ${{ matrix.config.comp }} CXXFLAGS: "-Werror" strategy: fail-fast: false matrix: config: - name: Ubuntu 22.04 GCC os: ubuntu-22.04 compiler: g++ comp: gcc run_32bit_tests: true run_64bit_tests: true shell: bash - name: Ubuntu 22.04 Clang os: ubuntu-22.04 compiler: clang++ comp: clang run_32bit_tests: true run_64bit_tests: true shell: bash - name: Android NDK aarch64 os: ubuntu-22.04 compiler: aarch64-linux-android29-clang++ comp: ndk run_armv8_tests: true shell: bash - name: Android NDK arm os: ubuntu-22.04 compiler: armv7a-linux-androideabi29-clang++ comp: ndk run_armv7_tests: true shell: bash # Currently segfaults in the CI unrelated to a Stockfish change. # - name: Linux GCC riscv64 # os: ubuntu-22.04 # compiler: g++ # comp: gcc # run_riscv64_tests: true # base_image: "riscv64/alpine:edge" # platform: linux/riscv64 # shell: bash - name: Linux GCC ppc64 os: ubuntu-22.04 compiler: g++ comp: gcc run_ppc64_tests: true base_image: "ppc64le/alpine:latest" platform: linux/ppc64le shell: bash - name: macOS 15 Apple Clang os: macos-15-intel compiler: clang++ comp: clang run_64bit_tests: true shell: bash - name: macOS 15 Apple Clang M1 os: macos-15 compiler: clang++ comp: clang run_64bit_tests: false run_m1_tests: true shell: bash - name: macOS 15 GCC 11 os: macos-15-intel compiler: g++-11 comp: gcc run_64bit_tests: true shell: bash - name: Windows 2022 Mingw-w64 GCC x86_64 os: windows-2022 compiler: g++ comp: mingw run_64bit_tests: true msys_sys: mingw64 msys_env: x86_64-gcc shell: msys2 {0} - name: Windows 2022 Mingw-w64 GCC i686 os: windows-2022 compiler: g++ comp: mingw run_32bit_tests: true msys_sys: mingw32 msys_env: i686-gcc shell: msys2 {0} - name: Windows 2022 Mingw-w64 Clang x86_64 os: windows-2022 compiler: clang++ comp: clang run_64bit_tests: true msys_sys: clang64 msys_env: clang-x86_64-clang shell: msys2 {0} - name: Windows 11 Mingw-w64 Clang arm64 os: windows-11-arm compiler: clang++ comp: clang run_armv8_tests: true msys_sys: clangarm64 msys_env: clang-aarch64-clang shell: msys2 {0} defaults: run: working-directory: src shell: ${{ matrix.config.shell }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 persist-credentials: false - name: Download required linux packages if: runner.os == 'Linux' run: | sudo apt update sudo apt install expect valgrind g++-multilib qemu-user-static - name: Install NDK if: runner.os == 'Linux' run: | if [ $COMP == ndk ]; then NDKV="27.2.12479018" ANDROID_ROOT=/usr/local/lib/android ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager echo "y" | $SDKMANAGER "ndk;$NDKV" ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV fi - name: Set up QEMU if: matrix.config.base_image uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx if: matrix.config.base_image uses: docker/setup-buildx-action@v3 - name: Build Docker container if: matrix.config.base_image run: | docker buildx build --platform ${{ matrix.config.platform }} --load -t sf_builder - << EOF FROM ${{ matrix.config.base_image }} WORKDIR /app RUN apk update && apk add make g++ CMD ["sh", "src/script.sh"] EOF - name: Download required macOS packages if: runner.os == 'macOS' run: brew install coreutils gcc@11 - name: Setup msys and install required packages if: runner.os == 'Windows' uses: msys2/setup-msys2@v2 with: msystem: ${{ matrix.config.msys_sys }} install: mingw-w64-${{ matrix.config.msys_env }} make git expect - name: Download the used network from the fishtest framework run: make net - name: Extract the bench number from the commit history run: | for hash in $(git rev-list -100 HEAD); do benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true done [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found" - name: Check compiler run: | if [ -z "${{ matrix.config.base_image }}" ]; then if [ $COMP == ndk ]; then export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH fi $COMPCXX -v else echo "$COMPCXX -v" > script.sh docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder fi - name: Test help target run: make help - name: Check git run: git --version # x86-32 tests - name: Test debug x86-32 build if: matrix.config.run_32bit_tests run: | export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" make clean make -j4 ARCH=x86-32 optimize=no debug=yes build ../tests/signature.sh $benchref - name: Test x86-32 build if: matrix.config.run_32bit_tests run: | make clean make -j4 ARCH=x86-32 build ../tests/signature.sh $benchref - name: Test x86-32-sse41-popcnt build if: matrix.config.run_32bit_tests run: | make clean make -j4 ARCH=x86-32-sse41-popcnt build ../tests/signature.sh $benchref - name: Test x86-32-sse2 build if: matrix.config.run_32bit_tests run: | make clean make -j4 ARCH=x86-32-sse2 build ../tests/signature.sh $benchref - name: Test general-32 build if: matrix.config.run_32bit_tests run: | make clean make -j4 ARCH=general-32 build ../tests/signature.sh $benchref # x86-64 tests - name: Test debug x86-64-avx2 build if: matrix.config.run_64bit_tests run: | export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" make clean make -j4 ARCH=x86-64-avx2 optimize=no debug=yes build ../tests/signature.sh $benchref - name: Test x86-64-bmi2 build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-bmi2 build ../tests/signature.sh $benchref - name: Test x86-64-avx2 build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-avx2 build ../tests/signature.sh $benchref # Test a deprecated arch - name: Test x86-64-modern build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-modern build ../tests/signature.sh $benchref - name: Test x86-64-sse41-popcnt build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-sse41-popcnt build ../tests/signature.sh $benchref - name: Test x86-64-ssse3 build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-ssse3 build ../tests/signature.sh $benchref - name: Test x86-64-sse3-popcnt build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-sse3-popcnt build ../tests/signature.sh $benchref - name: Test x86-64 build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64 build ../tests/signature.sh $benchref - name: Test general-64 build if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=general-64 build ../tests/signature.sh $benchref - name: Test apple-silicon build if: matrix.config.run_m1_tests run: | make clean make -j4 ARCH=apple-silicon build ../tests/signature.sh $benchref # armv8 tests - name: Test armv8 build if: matrix.config.run_armv8_tests run: | if [ $COMP == ndk ]; then export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" fi make clean make -j4 ARCH=armv8 build ../tests/signature.sh $benchref - name: Test armv8-dotprod build if: matrix.config.run_armv8_tests run: | if [ $COMP == ndk ]; then export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" fi make clean make -j4 ARCH=armv8-dotprod build ../tests/signature.sh $benchref # armv7 tests - name: Test armv7 build if: matrix.config.run_armv7_tests run: | export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" make clean make -j4 ARCH=armv7 build ../tests/signature.sh $benchref - name: Test armv7-neon build if: matrix.config.run_armv7_tests run: | export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" make clean make -j4 ARCH=armv7-neon build ../tests/signature.sh $benchref # riscv64 tests - name: Test riscv64 build if: matrix.config.run_riscv64_tests run: | echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder ../tests/signature.sh $benchref # ppc64 tests - name: Test ppc64 build if: matrix.config.run_ppc64_tests run: | echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder ../tests/signature.sh $benchref # Other tests - name: Check perft and search reproducibility if: matrix.config.run_64bit_tests run: | make clean make -j4 ARCH=x86-64-avx2 build ../tests/perft.sh ../tests/reprosearch.sh ================================================ FILE: .github/workflows/upload_binaries.yml ================================================ name: Upload Binaries on: workflow_call: inputs: matrix: type: string required: true secrets: token: required: true jobs: Artifacts: name: ${{ matrix.config.name }} ${{ matrix.binaries }} runs-on: ubuntu-latest env: EXT: ${{ matrix.config.ext }} NAME: ${{ matrix.config.simple_name }} BINARY: ${{ matrix.binaries }} strategy: fail-fast: false matrix: ${{ fromJson(inputs.matrix) }} defaults: run: shell: bash steps: - uses: actions/checkout@v4 with: persist-credentials: false - name: Download artifact from compilation uses: actions/download-artifact@v4 with: name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} path: ${{ matrix.config.simple_name }} ${{ matrix.binaries }} - name: Create Package run: | mkdir stockfish - name: Download wiki run: | git clone https://github.com/official-stockfish/Stockfish.wiki.git wiki rm -rf wiki/.git mv wiki stockfish/ - name: Copy files run: | mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow cd stockfish-workflow cp -r src ../stockfish/ cp -r scripts ../stockfish/ cp stockfish-$NAME-$BINARY$EXT ../stockfish/ cp "Top CPU Contributors.txt" ../stockfish/ cp Copying.txt ../stockfish/ cp AUTHORS ../stockfish/ cp CITATION.cff ../stockfish/ cp README.md ../stockfish/ cp CONTRIBUTING.md ../stockfish/ - name: Create tar if: ${{ !startsWith(matrix.config.os, 'windows') }} run: | chmod +x ./stockfish/stockfish-$NAME-$BINARY$EXT tar -cvf stockfish-$NAME-$BINARY.tar stockfish - name: Create zip if: ${{ startsWith(matrix.config.os, 'windows') }} run: | zip -r stockfish-$NAME-$BINARY.zip stockfish - name: Release if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag' uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 with: files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} token: ${{ secrets.token }} - name: Get last commit sha id: last_commit run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV - name: Get commit date id: commit_date run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV # Make sure that an old ci that still runs on master doesn't recreate a prerelease - name: Check Pullable Commits id: check_commits run: | git fetch CHANGES=$(git rev-list HEAD..origin/master --count) echo "CHANGES=$CHANGES" >> $GITHUB_ENV - name: Official Release? id: official_release # Check for "Official release version of Stockfish" in the commit message run: | if git log -1 --pretty=%B | grep -q "Official release version of Stockfish"; then echo "OFFICIAL_RELEASE=true" >> $GITHUB_ENV else echo "OFFICIAL_RELEASE=false" >> $GITHUB_ENV fi - name: Prerelease if: github.ref_name == 'master' && env.CHANGES == '0' && env.OFFICIAL_RELEASE == 'false' continue-on-error: true uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981 with: name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} prerelease: true files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} token: ${{ secrets.token }} ================================================ FILE: .gitignore ================================================ # Files from build **/*.o **/*.s src/.depend .build_sha.txt .build_date.txt # Built binary src/stockfish* src/-lstdc++.res # Neural network for the NNUE evaluation **/*.nnue # Files generated by the instrumented tests tsan.supp __pycache__/ tests/syzygy tests/bench_tmp.epd ================================================ FILE: AUTHORS ================================================ # Founders of the Stockfish project and Fishtest infrastructure Tord Romstad (romstad) Marco Costalba (mcostalba) Joona Kiiski (zamar) Gary Linscott (glinscott) # Authors and inventors of NNUE, training, and NNUE port Yu Nasu (ynasu87) Motohiro Isozaki (yaneurao) Hisayori Noda (nodchip) # All other authors of Stockfish code (in alphabetical order) 87flowers Aditya (absimaldata) Adrian Petrescu (apetresc) Ahmed Kerimov (wcdbmv) Ajith Chandy Jose (ajithcj) Alain Savard (Rocky640) Alayan Feh (Alayan-stk-2) Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) AliceRoselia Andreas Jan van der Meulen (Andyson007) Andreas Matthies (Matthies) Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) Andrey Neporada (nepal) Andy Duplain Antoine Champion (antoinechampion) Aram Tumanian (atumanian) Arjun Temurnikar Aron Petkovski (fury) Arseniy Surkov (codedeliveryservice) Artem Solopiy (EntityFX) Auguste Pop Balazs Szilagyi Balint Pfliegel Baptiste Rech (breatn) Ben Chaney (Chaneybenjamini) Ben Koshy (BKSpurgeon) Bill Henry (VoyagerOne) Bojun Guo (noobpwnftw, Nooby) borg323 Boštjan Mejak (PedanticHacker) braich Brian Sheppard (SapphireBrand, briansheppard-toast) Bruno de Melo Costa (BM123499) Bruno Pellanda (pellanda) Bryan Cross (crossbr) candirufish Carlos Esparza Sánchez (ces42) Chess13234 Chris Bao (sscg13) Chris Cain (ceebo) Ciekce clefrks Clemens L. (rn5f107s2) Cody Ho (aesrentai) CSTENTOR Dale Weiler (graphitemaster) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) Daniel Monroe (daniel-monroe) Daniel Samek (DanSamek) Dan Schmidt (dfannius) Dariusz Orzechowski (dorzechowski) David (dav1312) David Zar Daylen Yang (daylen) Deshawn Mohan-Smith (GoldenRare) Dieter Dobbelaere (ddobbelaere) DiscanX Dominik Schlösser (domschl) double-beep Douglas Matos Gomes (dsmsgms) Dubslow Eduardo Cáceres (eduherminio) Eelco de Groot (KingDefender) Ehsan Rashid (erashid) Elvin Liu (solarlight2) erbsenzaehler Ernesto Gatti evqsx Fabian Beuke (madnight) Fabian Fichter (ianfab) Fanael Linithien (Fanael) fanon Fauzi Akram Dabat (fauzi2) Felix Wittmann gamander Gabriele Lombardo (gabe) Gahtan Nahdi Gary Heckman (gheckman) George Sobala (gsobala) gguliash Giacomo Lorenzetti (G-Lorenz) Gian-Carlo Pascutto (gcp) Goh CJ (cj5716) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer GuardianRM Guy Vreuls (gvreuls) Günther Demetz (pb00067, pb00068) Henri Wiechers Hiraoka Takuya (HiraokaTakuya) homoSapiensSapiens Hongzhi Cheng Ivan Ivec (IIvec) Jacques B. (Timshel) Jake Senne (w1wwwwww) Jakub Ciolek (jake-ciolek) Jan Ondruš (hxim) Jared Kish (Kurtbusch, kurt22i) Jarrod Torriero (DU-jdto) Jasper Shovelton (Beanie496) Jean-Francois Romang (jromang) Jean Gauthier (OuaisBla) Jekaa Jerry Donald Watson (jerrydonaldwatson) jjoshua2 Jonathan Buladas Dumale (SFisGOD) Jonathan Calovski (Mysseno) Jonathan McDermid (jonathanmcdermid) Joost VandeVondele (vondele) Joseph Ellis (jhellis3) Joseph R. Prostko Jost Triller (tsoj) Jörg Oster (joergoster) Julian Willemer (NightlyKing) jundery Justin Blanchard (UncombedCoconut) Kazuki Yamashita (KazApps) Kelly Wilson Ken Takusagawa Kenneth Lee (kennethlee33) kevlu8 Kian E (KJE-98) Kieren Pearson (KierenP) kinderchocolate Kiran Panditrao (Krgp) Kirill Zaripov (kokodio) Kojirion Krisztián Peőcz Krystian Kuzniarek (kuzkry) Leonardo Ljubičić (ICCF World Champion) Leonid Pechenik (lp--) Li Ying (yl25946) Liam Keegan (lkeegan) Linmiao Xu (linrock) Linus Arver (listx) loco-loco Lub van den Berg (ElbertoOne) Luca Brivio (lucabrivio) Lucas Braesch (lucasart) Lyudmil Antonov (lantonov) Maciej Żenczykowski (zenczykowski) Malcolm Campbell (xoto10) Mark Marosi (Mapika) Mark Tenzer (31m059) marotear Mathias Parnaudeau (mparnaudeau) Matt Ginsberg (mattginsberg) Matthew Lai (matthewlai) Matthew Sullivan (Matt14916) Max A. (Disservin) Maxim Masiutin (maximmasiutin) Maxim Molchanov (Maxim) Michael An (man) Michael Byrne (MichaelB7) Michael Chaly (Vizvezdenec) Michael Stembera (mstembera) Michael Whiteley (protonspring) Michel Van den Bergh (vdbergh) Miguel Lahoz (miguel-l) Mikael Bäckman (mbootsector) Mike Babigian (Farseer) Mira Miroslav Fontán (Hexik) Moez Jellouli (MJZ1977) Mohammed Li (tthsqe12) Muzhen J (XInTheDark) Nathan Rugg (nmrugg) Nguyen Pham (nguyenpham) Nicklas Persson (NicklasPersson) Nick Pelling (nickpelling) Nicolas Duhamel (nikloskoda) Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) Norman Schmidt (FireFather) notruck Nour Berakdar (Nonlinear) Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) Ondřej Mišina (AndrovT) Oskar Werkelin Ahlin Ömer Faruk Tutkun (OmerFarukTutkun) Pablo Vazquez Panthee Pascal Romaret Pasquale Pigazzini (ppigazzini) Patrick Jansen (mibere) Patrick Leonhardt (Yoshie2000) Peter Schneider (pschneider1968) Peter Zsifkovits (CoffeeOne) Pieter te Brake (pieterteb) PikaCat Praveen Kumar Tummala (praveentml) Prokop Randáček (ProkopRandacek) Rahul Dsilva (silversolver1) Ralph Stößer (Ralph Stoesser) Raminder Singh renouve Reuven Peleg (R-Peleg) Richard Lloyd (Richard-Lloyd) Robert Nürnberg (robertnurnberg) Rodrigo Exterckötter Tjäder Rodrigo Roim (roim) Ronald de Man (syzygy1, syzygy) Ron Britvich (Britvich) rqs Rui Coelho (ruicoelhopedro) rustam-cpp Ryan Hirsch Ryan Schmitt Ryan Takker Sami Kiminki (skiminki) Sebastian Buchwald (UniQP) Sergei Antonov (saproj) Sergei Ivanov (svivanov72) Sergio Vieri (sergiovieri) sf-x Shahin M. Shahin (peregrine) Shane Booth (shane31) Shawn Varghese (xXH4CKST3RXx) Shawn Xu (xu-shawn) Siad Daboul (Topologist) Stefan Geschwentner (locutus2) Stefano Cardanobile (Stefano80) Stefano Di Martino (StefanoD) Steinar Gunderson (sesse) Stéphane Nicolet (snicolet) Stephen Touset (stouset) Stockfisher69 Styx (styxdoto) Syine Mineta (MinetaS) Taras Vuk (TarasVuk) Thanar2 thaspel theo77186 TierynnB Timothy Herchen (anematode) Ting-Hsuan Huang (fffelix-huang) Tobias Steinmann Tomasz Sobczyk (Sopel97) Tom Truscott Tom Vijlbrief (tomtor) Torsten Franz (torfranz, tfranzer) Torsten Hellwig (Torom) Tracey Emery (basepr1me) tttak Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) Viren Wencey Wang Will Miles (willm) windfishballad xefoci7612 Xiang Wang (KatyushaScarlet) Yen-Chao Shen (lemteay) ZlomenyMesic zz4032 # Additionally, we acknowledge the authors and maintainers of fishtest, # an amazing and essential framework for Stockfish development! # # https://github.com/official-stockfish/fishtest/blob/master/AUTHORS ================================================ FILE: CITATION.cff ================================================ # This CITATION.cff file was generated with cffinit. # Visit https://bit.ly/cffinit to generate yours today! cff-version: 1.2.0 title: Stockfish message: >- Please cite this software using the metadata from this file. type: software authors: - name: The Stockfish developers (see AUTHORS file) repository-code: 'https://github.com/official-stockfish/Stockfish' url: 'https://stockfishchess.org/' repository-artifact: 'https://stockfishchess.org/download/' abstract: Stockfish is a free and strong UCI chess engine. keywords: - chess - artificial intelligence (AI) - tree search - alpha-beta search - neural networks (NN) - efficiently updatable neural networks (NNUE) license: GPL-3.0 ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Stockfish Welcome to the Stockfish project! We are excited that you are interested in contributing. This document outlines the guidelines and steps to follow when making contributions to Stockfish. ## Table of Contents - [Building Stockfish](#building-stockfish) - [Making Contributions](#making-contributions) - [Reporting Issues](#reporting-issues) - [Submitting Pull Requests](#submitting-pull-requests) - [Code Style](#code-style) - [Community and Communication](#community-and-communication) - [License](#license) ## Building Stockfish In case you do not have a C++ compiler installed, you can follow the instructions from our wiki. - [Ubuntu][ubuntu-compiling-link] - [Windows][windows-compiling-link] - [macOS][macos-compiling-link] ## Making Contributions ### Reporting Issues If you find a bug, please open an issue on the [issue tracker][issue-tracker-link]. Be sure to include relevant information like your operating system, build environment, and a detailed description of the problem. _Please note that Stockfish's development is not focused on adding new features. Thus any issue regarding missing features will potentially be closed without further discussion._ ### Submitting Pull Requests - Functional changes need to be tested on fishtest. See [Creating my First Test][creating-my-first-test] for more details. The accompanying pull request should include a link to the test results and the new bench. - Non-functional changes (e.g. refactoring, code style, documentation) do not need to be tested on fishtest, unless they might impact performance. - Provide a clear and concise description of the changes in the pull request description. _First time contributors should add their name to [AUTHORS](./AUTHORS)._ _Stockfish's development is not focused on adding new features. Thus any pull request introducing new features will potentially be closed without further discussion._ ## Code Style Changes to Stockfish C++ code should respect our coding style defined by [.clang-format](.clang-format). You can format your changes by running `make format`. This requires clang-format version 20 to be installed on your system. ## Navigate For experienced Git users who frequently use git blame, it is recommended to configure the blame.ignoreRevsFile setting. This setting is useful for excluding noisy formatting commits. ```bash git config blame.ignoreRevsFile .git-blame-ignore-revs ``` ## Community and Communication - Join the [Stockfish discord][discord-link] to discuss ideas, issues, and development. - Participate in the [Stockfish GitHub discussions][discussions-link] for broader conversations. ## License By contributing to Stockfish, you agree that your contributions will be licensed under the GNU General Public License v3.0. See [Copying.txt][copying-link] for more details. Thank you for contributing to Stockfish and helping us make it even better! [copying-link]: https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt [discord-link]: https://discord.gg/GWDRS3kU6R [discussions-link]: https://github.com/official-stockfish/Stockfish/discussions/new [creating-my-first-test]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test#create-your-test [issue-tracker-link]: https://github.com/official-stockfish/Stockfish/issues [ubuntu-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-1 [windows-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler [macos-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-2 ================================================ FILE: Copying.txt ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================
[![Stockfish][stockfish128-logo]][website-link]

Stockfish

A free and strong UCI chess engine.
[Explore Stockfish docs »][wiki-link]

[Report bug][issue-link] · [Open a discussion][discussions-link] · [Discord][discord-link] · [Blog][website-blog-link] [![Build][build-badge]][build-link] [![License][license-badge]][license-link]
[![Release][release-badge]][release-link] [![Commits][commits-badge]][commits-link]
[![Website][website-badge]][website-link] [![Fishtest][fishtest-badge]][fishtest-link] [![Discord][discord-badge]][discord-link]
## Overview [Stockfish][website-link] is a **free and strong UCI chess engine** derived from Glaurung 2.1 that analyzes chess positions and computes the optimal moves. Stockfish **does not include a graphical user interface** (GUI) that is required to display a chessboard and to make it easy to input moves. These GUIs are developed independently from Stockfish and are available online. **Read the documentation for your GUI** of choice for information about how to use Stockfish with it. See also the Stockfish [documentation][wiki-usage-link] for further usage help. ## Files This distribution of Stockfish consists of the following files: * [README.md][readme-link], the file you are currently reading. * [Copying.txt][license-link], a text file containing the GNU General Public License version 3. * [AUTHORS][authors-link], a text file with the list of authors for the project. * [src][src-link], a subdirectory containing the full source code, including a Makefile that can be used to compile Stockfish on Unix-like systems. * a file with the .nnue extension, storing the neural network for the NNUE evaluation. Binary distributions will have this file embedded. ## Contributing __See [Contributing Guide](CONTRIBUTING.md).__ ### Donating hardware Improving Stockfish requires a massive amount of testing. You can donate your hardware resources by installing the [Fishtest Worker][worker-link] and viewing the current tests on [Fishtest][fishtest-link]. ### Improving the code In the [chessprogramming wiki][programming-link], many techniques used in Stockfish are explained with a lot of background information. The [section on Stockfish][programmingsf-link] describes many features and techniques used by Stockfish. However, it is generic rather than focused on Stockfish's precise implementation. The engine testing is done on [Fishtest][fishtest-link]. If you want to help improve Stockfish, please read this [guideline][guideline-link] first, where the basics of Stockfish development are explained. Discussions about Stockfish take place these days mainly in the Stockfish [Discord server][discord-link]. This is also the best place to ask questions about the codebase and how to improve it. ## Compiling Stockfish Stockfish has support for 32 or 64-bit CPUs, certain hardware instructions, big-endian machines such as Power PC, and other platforms. On Unix-like systems, it should be easy to compile Stockfish directly from the source code with the included Makefile in the folder `src`. In general, it is recommended to run `make help` to see a list of make targets with corresponding descriptions. An example suitable for most Intel and AMD chips: ``` cd src make -j profile-build ``` Detailed compilation instructions for all platforms can be found in our [documentation][wiki-compile-link]. Our wiki also has information about the [UCI commands][wiki-uci-link] supported by Stockfish. ## Terms of use Stockfish is free and distributed under the [**GNU General Public License version 3**][license-link] (GPL v3). Essentially, this means you are free to do almost exactly what you want with the program, including distributing it among your friends, making it available for download from your website, selling it (either by itself or as part of some bigger software package), or using it as the starting point for a software project of your own. The only real limitation is that whenever you distribute Stockfish in some way, you MUST always include the license and the full source code (or a pointer to where the source code can be found) to generate the exact binary you are distributing. If you make any changes to the source code, these changes must also be made available under GPL v3. ## Acknowledgements Stockfish uses neural networks trained on [data provided by the Leela Chess Zero project][lc0-data-link], which is made available under the [Open Database License][odbl-link] (ODbL). [authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS [build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml [commits-link]: https://github.com/official-stockfish/Stockfish/commits/master [discord-link]: https://discord.gg/GWDRS3kU6R [issue-link]: https://github.com/official-stockfish/Stockfish/issues/new?assignees=&labels=&template=BUG-REPORT.yml [discussions-link]: https://github.com/official-stockfish/Stockfish/discussions/new [fishtest-link]: https://tests.stockfishchess.org/tests [guideline-link]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test [license-link]: https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt [programming-link]: https://www.chessprogramming.org/Main_Page [programmingsf-link]: https://www.chessprogramming.org/Stockfish [readme-link]: https://github.com/official-stockfish/Stockfish/blob/master/README.md [release-link]: https://github.com/official-stockfish/Stockfish/releases/latest [src-link]: https://github.com/official-stockfish/Stockfish/tree/master/src [stockfish128-logo]: https://stockfishchess.org/images/logo/icon_128x128.png [uci-link]: https://backscattering.de/chess/uci/ [website-link]: https://stockfishchess.org [website-blog-link]: https://stockfishchess.org/blog/ [wiki-link]: https://github.com/official-stockfish/Stockfish/wiki [wiki-compile-link]: https://github.com/official-stockfish/Stockfish/wiki/Compiling-from-source [wiki-uci-link]: https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands [wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage [worker-link]: https://github.com/official-stockfish/fishtest/wiki/Running-the-worker [lc0-data-link]: https://storage.lczero.org/files/training_data [odbl-link]: https://opendatacommons.org/licenses/odbl/odbl-10.txt [build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github [commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge [discord-badge]: https://img.shields.io/discord/435943710472011776?style=for-the-badge&label=discord&logo=Discord [fishtest-badge]: https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=Fishtest&up_color=success&up_message=Online&url=https%3A%2F%2Ftests.stockfishchess.org%2Ftests%2Ffinished [license-badge]: https://img.shields.io/github/license/official-stockfish/Stockfish?style=for-the-badge&label=license&color=success [release-badge]: https://img.shields.io/github/v/release/official-stockfish/Stockfish?style=for-the-badge&label=official%20release [website-badge]: https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=website&up_color=success&up_message=Online&url=https%3A%2F%2Fstockfishchess.org ================================================ FILE: Top CPU Contributors.txt ================================================ Contributors to Fishtest with >10,000 CPU hours, as of 2025-12-24. Thank you! Username CPU Hours Games played ------------------------------------------------------------------ noobpwnftw 42692720 3385202467 vdv 39922218 1277282126 technologov 26354561 1163905856 linrock 12002255 785641643 olafm 3030005 197722318 mlang 3026000 200065824 okrout 3020471 268364402 pemo 2009761 66178221 TueRens 1956328 83294326 sebastronomy 1806628 73868874 dew 1689162 100033738 grandphish2 1479778 92306101 JojoM 1130646 73666860 rpngn 1081976 65292619 oz 1029329 69522328 gvreuls 844572 59249068 tvijlbrief 796125 51897690 mibere 703840 46867607 leszek 609538 45301765 cw 519602 34988289 fastgm 503862 30260818 robal 503208 32703510 maximmasiutin 500174 30818270 CSU_Dynasty 481663 31916842 ctoks 435431 28551199 crunchy 427414 27371625 bcross 415724 29061187 mgrabiak 380202 27586936 tolkki963 358623 26373242 velislav 342588 22140902 ncfish1 329039 20624527 Fisherman 327231 21829379 Fifis 323909 16200123 Sylvain27 320732 11671388 marrco 310446 19587107 Calis007 310201 18969692 Viren6 297938 5847458 Dantist 296386 18031762 naclosagc 296040 13865010 anematode 293146 3918134 maposora 278093 20454200 javran 271465 20506096 cody 258835 13301710 nordlandia 249322 16420192 Goatminola 218812 21411814 Torom 211061 7238522 glinscott 208125 13277240 drabel 204167 13930674 Wencey 203584 9943614 mhoram 202894 12601997 sschnee 201756 12874780 bking_US 198894 11876016 Mineta 195312 10337614 Thanar 179852 12365359 armo9494 169747 11254404 amicic 161636 11290899 DesolatedDodo 160605 10392474 markkulix 158320 13538874 spams 157128 10319326 sqrt2 147963 9724586 vdbergh 141201 9308647 jcAEie 140086 10603658 CoffeeOne 137100 5024116 malala 136182 8002293 xoto 133759 9159372 Dubslow 130795 8609646 zeryl 129154 7911565 davar 129023 8376525 DMBK 122960 8980062 cuistot 122470 8393996 megaman7de 122254 8066174 dsmith 122059 7570238 Wolfgang 120919 8619168 CypressChess 120902 8683904 sterni1971 113754 6054022 Spprtr 113356 8129809 Data 113305 8220352 BrunoBanani 112960 7436849 skiminki 107583 7218170 MediumBerry5575 103884 7830022 MaZePallas 102823 6633619 YvesKn 102213 5098076 sunu 100167 7040199 thirdlife 99182 2246960 ElbertoOne 99028 7023771 TechiePirate 98957 1249064 DeepnessFulled 97313 5083358 TataneSan 97257 4239502 romangol 95662 7784954 bigpen0r 94825 6529241 jojo2357 94358 7635486 malfoy 92712 3392874 voidedstarlight 92582 2342038 brabos 92118 6186135 Maxim 90818 3283364 psk 89957 5984901 szupaw 89775 7800606 jromang 87260 5988073 racerschmacer 85805 6122790 Vizvezdenec 83761 5344740 0x3C33 82614 5271253 MarcusTullius 82359 5335665 BRAVONE 81239 5054681 rn 78566 6000852 nssy 76497 5259388 woutboat 76379 6031688 teddybaer 75125 5407666 Pking_cda 73776 5293873 yurikvelo 73611 5046822 Zirie 71260 4602355 Bobo1239 70579 4794999 solarlight 70517 5028306 dv8silencer 70287 3883992 0x539 67147 2918044 manap 66273 4121774 tinker 64333 4268790 CounterFlow 63914 3775062 mecevdimitar 62493 3508750 DanielMiao1 62188 1335664 qurashee 61208 3429862 AGI 58316 4336328 robnjr 57262 4053117 Freja 56938 3733019 MaxKlaxxMiner 56879 3423958 ttruscott 56010 3680085 rkl 55132 4164467 jmdana 54988 4041917 notchris 53936 4184018 renouve 53811 3501516 jibarbosa 53504 5110028 somethingintheshadows 52333 4344808 finfish 51360 3370515 eva42 51272 3599691 eastorwest 51117 3454811 sylvek 50391 3765170 rap 49985 3219146 pb00067 49733 3298934 GPUex 48686 3684998 OuaisBla 48626 3445134 lemtea 48563 1672454 ronaldjerum 47654 3240695 abdicj 46740 2709482 biffhero 46564 3111352 oryx 46422 3607582 VoyagerOne 45476 3452465 rdp65536 43948 2881890 speedycpu 43842 3003273 jbwiebe 43305 2805433 gopeto 43046 2821514 Antihistamine 41788 2761312 mhunt 41735 2691355 WoodMan777 40858 3491196 Epic29 40771 4067404 drauh 40419 1634770 homyur 39893 2850481 gri 39871 2515779 vidar808 39774 1656372 Gaster319 38994 3477702 Garf 37741 2999686 SC 37299 2731694 ZacHFX 36533 2553282 csnodgrass 36207 2688994 icewulf 34935 2421834 strelock 34716 2074055 Jopo12321 33921 2531448 xuhdev 33798 3295210 csnodgra 33780 1446866 EthanOConnor 33370 2090311 slakovv 32915 2021889 IslandLambda 32667 1659344 Kataiser 32477 2688862 shawnxu 32330 2830036 srowen 32248 1791136 qgluca 31941 2491622 Gelma 31771 1551204 kdave 31157 2198362 manapbk 30987 1810399 votoanthuan 30691 2460856 Prcuvu 30377 2170122 anst 30301 2190091 jkiiski 30136 1904470 spcc 29925 1901692 hyperbolic.tom 29840 2017394 chuckstablers 29659 2093438 Pyafue 29650 1902349 Flopzee 29388 1899905 hoching 29054 2067144 belzedar94 28846 1811530 wizardassassin 28007 2318204 purpletree 27892 2061966 Kyrega 27674 963872 joendter 27193 1781570 Danielv123 27132 1043614 chriswk 26902 1868317 xwziegtm 26897 2124586 spotscene 26877 2139674 achambord 26582 1767323 shreven 26448 1703328 Patrick_G 26276 1801617 yorkman 26193 1992080 ols 26173 1443517 wer 26136 793146 Skiff84 26083 1135002 RudyMars 25980 2211364 Ulysses 25544 1714542 SFTUser 25182 1675689 nabildanial 25068 1531665 Sharaf_DG 24765 1786697 rodneyc 24376 1416402 jsys14 24297 1721230 AndreasKrug 24235 1934711 agg177 23890 1395014 Disservin 23768 1934576 Ente 23752 1678188 JanErik 23408 1703875 Isidor 23388 1680691 Norabor 23371 1603244 Nullvalue 23155 2022752 fishtester 23115 1581502 cisco2015 22920 1763301 Hjax 22561 1566151 gerbil 22435 1679842 Serpensin 22396 1861156 team-oh 22272 1636708 mkstockfishtester 22253 2029566 Roady 22220 1465606 tsim67 22077 1353048 MazeOfGalious 21978 1629593 sg4032 21950 1643373 sev 21791 1983016 ianh2105 21725 1632562 xor12 21628 1680365 dex 21612 1467203 nesoneg 21494 1463031 user213718 21454 1404128 sphinx 21211 1384728 qoo_charly_cai 21136 1514927 jjoshua2 21001 1423089 Zake9298 20938 1565848 horst.prack 20878 1465656 0xB00B1ES 20590 1208666 t3hf1sht3ster 20544 673134 Dinde 20459 1292774 j3corre 20405 941444 Adrian.Schmidt123 20316 1281436 wei 19973 1745989 teenychess 19819 1762006 RickGroszkiewicz 19749 1913986 rstoesser 19569 1293588 eudhan 19274 1283717 nalanzeyu 19211 396674 vulcan 18871 1729392 Karpovbot 18766 1053178 Farseer 18536 1078326 jundery 18445 1115855 sebv15 18267 1262588 whelanh 17887 347974 ville 17883 1384026 chris 17698 1487385 purplefishies 17595 1092533 dju 17414 981289 iisiraider 17275 1049015 Karby 17177 1030688 fogleman 17134 815562 zhujianzhao 17111 1666972 DragonLord 17014 1162790 pirt 16993 1274363 redstone59 16842 1461780 Alb11747 16787 1213990 Naven94 16414 951718 scuzzi 16155 995347 IgorLeMasson 16064 1147232 micpilar 15866 1399266 ako027ako 15671 1173203 infinigon 15285 965966 fishtrawler 15205 1436165 Nikolay.IT 15154 1068349 Andrew Grant 15114 895539 OssumOpossum 14857 1007129 LunaticBFF57 14525 1190310 YELNAMRON 14480 1141420 enedene 14476 905279 MooTheCow 14459 1023868 BestBoyBerlin 14353 1365584 bpfliegel 14233 882523 mpx86 14019 759568 jpulman 13982 870599 getraideBFF 13871 1172846 crocogoat 13817 1119086 Nesa92 13806 1116101 joster 13717 946960 mbeier 13650 1044928 Pablohn26 13552 1088532 wxt9861 13550 1312306 biniek 13469 930029 Dark_wizzie 13422 1007152 Jackfish 13422 914984 Hongildong 13297 699288 Rudolphous 13244 883140 Phoenix17 13032 1124066 Machariel 13010 863104 mabichito 12903 749391 FormazChar 12899 980413 thijsk 12886 722107 AdrianSA 12860 804972 szczur90 12720 979324 mschmidt 12644 863193 korposzczur 12606 838168 fatmurphy 12547 853210 Oakwen 12537 856257 SapphireBrand 12416 969604 Snuuka 12392 509082 deflectooor 12386 579392 modolief 12386 896470 ckaz 12273 754644 pgontarz 12151 848794 dbernier 12103 860824 rensonthemove 11999 971993 stocky 11954 699440 ali-al-zhrani 11887 836126 3cho 11842 1036786 Craftyawesome 11736 832254 dragon123118 11578 1044142 ImperiumAeternum 11482 979142 lvdv 11475 594400 infinity 11470 727027 kusihe 11468 468450 vaskoul 11446 976902 aga 11412 695127 Def9Infinity 11408 700682 torbjo 11395 729145 Thomas A. Anderson 11372 732094 savage84 11358 670860 d64 11263 789184 Poly 11172 455568 enizor 11140 630194 snicolet 11106 869170 dapper 11032 771402 Ethnikoi 10993 945906 Karmatron 10871 678306 zarthus 10773 1034536 OliverClarke 10696 942654 Omed 10680 669816 cyberthink 10647 936538 basepi 10637 744851 michaelrpg 10624 748179 Cubox 10621 826448 GBx3TV 10499 343266 Styx 10450 867836 OIVAS7572 10420 995586 Garruk 10365 706465 dzjp 10343 732529 Lorenz 10311 886308 borinot 10026 902130 ================================================ FILE: scripts/.gitattributes ================================================ *.sh text eol=lf ================================================ FILE: scripts/get_native_properties.sh ================================================ #!/bin/sh # # Returns the best architecture supported by the CPU (as expected by src/Makefile ARCH=). # # Output format: # "\n" # # --------------------------- # Helpers (POSIX) # --------------------------- # Test hooks (optional env overrides) # GP_UNAME_S: override `uname -s` # GP_UNAME_M: override `uname -m` # GP_CPUINFO: path to a cpuinfo-like fixture file (defaults to /proc/cpuinfo) # GP_BITS: override getconf LONG_BIT result (32/64) # GP_SYSCTL_FEATURES: override sysctl feature strings on Darwin x86_64 cpuinfo_path=${GP_CPUINFO:-/proc/cpuinfo} # Normalize to a single-line, space-separated string. normalize_ws() { printf '%s\n' "$*" | tr '\n\t' ' ' | tr -s ' ' } die() { printf '%s\n' "$*" >&2 exit 1 } # Populate $flags from /proc/cpuinfo when available, # removing underscores and dots to reduce naming variations. get_flags() { if [ -r "$cpuinfo_path" ]; then flags=$( awk ' /^flags[ \t]*:|^Features[ \t]*:/ { if (!found) { gsub(/^flags[ \t]*:[ \t]*|^Features[ \t]*:[ \t]*|[_.]/, ""); line=$0 found=1 } } END { print line } ' "$cpuinfo_path" 2>/dev/null ) else flags='' fi flags=$(printf '%s\n' "$flags" | tr '[:upper:]' '[:lower:]') flags=$(normalize_ws "$flags") } # Populate $flags from sysctl on Darwin x86_64. get_sysctl_flags() { if [ -n "${GP_SYSCTL_FEATURES:-}" ]; then flags=$(printf '%s\n' "$GP_SYSCTL_FEATURES") else flags=$(sysctl -n machdep.cpu.features machdep.cpu.leaf7_features 2>/dev/null) fi flags=$(printf '%s\n' "$flags" | tr '\n' ' ' | tr '[:upper:]' '[:lower:]' | tr -d '._') flags=$(normalize_ws "$flags") } # Best-effort bitness for fallback arch selection. get_bits() { if [ -n "${GP_BITS:-}" ]; then bits=$GP_BITS else bits=$(getconf LONG_BIT 2>/dev/null) fi case $bits in 32|64) : ;; *) bits=64 ;; esac } # Extract ARM architecture level (5/6/7/8/...) from /proc/cpuinfo when present. get_arm_arch_level() { [ -r "$cpuinfo_path" ] || return 1 awk ' /^CPU architecture[ \t]*:/{ s=$0 sub(/^[^:]*:[ \t]*/, "", s) if (match(s, /[0-9]+/)) { print substr(s, RSTART, RLENGTH); exit } } /^Processor[ \t]*:/{ s=$0 sub(/^[^:]*:[ \t]*/, "", s) if (match(s, /ARMv[0-9]+/)) { print substr(s, RSTART+4, RLENGTH-4); exit } } ' "$cpuinfo_path" 2>/dev/null } # Best-effort ARM architecture level (5/6/7/8/...) with a minimal fallback. # Prefer /proc/cpuinfo when available; fall back to uname -m only when it encodes it. get_arm_level() { arm_level=$(get_arm_arch_level || :) if [ -n "$arm_level" ]; then printf '%s\n' "$arm_level" return 0 fi case ${1:-} in armv5*) printf '5\n' ;; armv6*) printf '6\n' ;; armv7*) printf '7\n' ;; armv8l) printf '8\n' ;; *) return 1 ;; esac } # Whole-token membership check. has_flag() { case " $flags " in *" $1 "*) return 0 ;; *) return 1 ;; esac } match_flags() { for f; do has_flag "$f" || return 1 done return 0 } match_any_flags() { for f; do has_flag "$f" && return 0 done return 1 } # SSE3 is often exposed as "pni" in /proc/cpuinfo. match_sse3() { match_any_flags sse3 pni } # AMD Zen1/2 exclusion logic (used for bmi2 tier). # https://web.archive.org/web/20250821132355/https://en.wikichip.org/wiki/amd/cpuid is_znver_1_2() ( [ -r "$cpuinfo_path" ] || exit 1 vendor_id=$(awk '/^vendor_id/{print $3; exit}' "$cpuinfo_path" 2>/dev/null) cpu_family=$(awk '/^cpu family/{print $4; exit}' "$cpuinfo_path" 2>/dev/null) [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] ) match_not_znver12_and_flags() { is_znver_1_2 && return 1 match_flags "$@" } match_sse3_popcnt() { has_flag popcnt || return 1 match_sse3 } match_true() { return 0; } # Generic selector: reads lines like "arch|predicate|arg1 arg2 ..." # First match wins; blank lines and lines starting with '#' are ignored. select_arch_from_table() { while IFS='|' read -r arch pred args; do [ -z "$arch" ] && continue case $arch in \#*) continue ;; esac if [ -n "$args" ]; then # Intentional splitting of args into words for the predicate. # shellcheck disable=SC2086 $pred $args && { printf '%s\n' "$arch"; return 0; } else $pred && { printf '%s\n' "$arch"; return 0; } fi done return 1 } # --------------------------- # Arch selection (table-driven) # --------------------------- set_arch_loongarch64() { true_arch=$( select_arch_from_table <<'EOF' loongarch64-lasx|match_flags|lasx loongarch64-lsx|match_flags|lsx loongarch64|match_true| EOF ) } set_arch_x86_64() { true_arch=$( select_arch_from_table <<'EOF' # Strongest -> weakest (first match wins) x86-64-avx512icl|match_flags|avx512f avx512cd avx512vl avx512dq avx512bw avx512ifma avx512vbmi avx512vbmi2 avx512vpopcntdq avx512bitalg avx512vnni vpclmulqdq gfni vaes x86-64-vnni512|match_flags|avx512vnni avx512dq avx512f avx512bw avx512vl x86-64-avx512|match_flags|avx512f avx512bw x86-64-avxvnni|match_flags|avxvnni x86-64-bmi2|match_not_znver12_and_flags|bmi2 x86-64-avx2|match_flags|avx2 x86-64-sse41-popcnt|match_flags|sse41 popcnt x86-64-ssse3|match_flags|ssse3 x86-64-sse3-popcnt|match_sse3_popcnt| x86-64|match_true| EOF ) } set_arch_x86_32() { true_arch=$( select_arch_from_table <<'EOF' x86-32-sse41-popcnt|match_flags|sse41 popcnt x86-32-sse2|match_flags|sse2 x86-32|match_true| EOF ) } # PPC64 needs a little parsing to distinguish vsx vs altivec. set_arch_ppc_64() { if [ -r "$cpuinfo_path" ] && grep -q "altivec" "$cpuinfo_path" 2>/dev/null; then # Typical: "cpu : POWER8E" (extract the number after POWER) power=$( awk -F: '/^cpu[ \t]*:/{print $2; exit}' "$cpuinfo_path" 2>/dev/null \ | sed -n 's/.*[Pp][Oo][Ww][Ee][Rr][^0-9]*\([0-9][0-9]*\).*/\1/p' ) if [ -z "$power" ]; then power=$( awk -F: '/^cpu[ \t]*:/{print $2; exit}' "$cpuinfo_path" 2>/dev/null \ | sed -n 's/.*\([0-9][0-9]*\).*/\1/p' ) fi case $power in ''|*[!0-9]*) true_arch='ppc-64-altivec' ;; *) if [ "$power" -gt 7 ] 2>/dev/null; then true_arch='ppc-64-vsx' else true_arch='ppc-64-altivec' fi ;; esac else true_arch='ppc-64' fi } # --------------------------- # OS / machine dispatch # --------------------------- uname_s=$(uname -s 2>/dev/null) uname_m=$(uname -m 2>/dev/null) uname_s=${GP_UNAME_S:-$uname_s} uname_m=${GP_UNAME_M:-$uname_m} case $uname_s in Darwin) case $uname_m in arm64) true_arch='apple-silicon' ;; x86_64) get_sysctl_flags set_arch_x86_64 ;; *) get_bits if [ "$bits" = "32" ]; then true_arch='general-32' else true_arch='general-64' fi ;; esac ;; Linux) get_flags case $uname_m in x86_64) set_arch_x86_64 ;; i?86) set_arch_x86_32 ;; ppc64*) set_arch_ppc_64 ;; aarch64|arm64) true_arch='armv8' if match_flags asimddp; then true_arch='armv8-dotprod' fi ;; armv5*|armv6*|armv7*|armv8l|arm*) arm_level=$(get_arm_level "$uname_m" || :) case $arm_level in 5|6) true_arch='general-32' ;; 7|8) true_arch='armv7' if match_flags neon; then true_arch='armv7-neon' fi ;; *) true_arch='general-32' if match_flags neon; then true_arch='armv7-neon' fi ;; esac ;; loongarch64*) set_arch_loongarch64 ;; riscv64) true_arch='riscv64' ;; e2k*) true_arch='e2k' ;; ppc|ppc32|powerpc) true_arch='ppc-32' ;; *) # Don't hard-fail: fall back to general-* so ARCH=native still builds get_bits if [ "$bits" = "32" ]; then true_arch='general-32' else true_arch='general-64' fi ;; esac ;; MINGW*ARM64*) # Windows ARM64 (MSYS2/MinGW) # Can't reliably detect ARM CPU features here true_arch='armv8-dotprod' ;; CYGWIN*|MINGW*|MSYS*) # Windows x86_64 (MSYS2/Cygwin/MinGW) get_flags set_arch_x86_64 ;; *) die "Unsupported system type: $uname_s" ;; esac printf '%s\n' "$true_arch" ================================================ FILE: scripts/net.sh ================================================ #!/bin/sh # download commands with a 5min time-out to ensure things fail if the server stalls wget_or_curl=$( (command -v wget >/dev/null 2>&1 && echo "wget -qO- --timeout=300 --tries=1") || (command -v curl >/dev/null 2>&1 && echo "curl -skL --max-time 300")) sha256sum=$( (command -v shasum >/dev/null 2>&1 && echo "shasum -a 256") || (command -v sha256sum >/dev/null 2>&1 && echo "sha256sum")) if [ -z "$sha256sum" ]; then >&2 echo "sha256sum not found, NNUE files will be assumed valid." fi get_nnue_filename() { grep "$1" evaluate.h | grep "#define" | sed "s/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/" } validate_network() { # If no sha256sum command is available, assume the file is always valid. if [ -n "$sha256sum" ] && [ -f "$1" ]; then if [ "$1" != "nn-$($sha256sum "$1" | cut -c 1-12).nnue" ]; then rm -f "$1" return 1 fi fi } fetch_network() { _filename="$(get_nnue_filename "$1")" if [ -z "$_filename" ]; then >&2 echo "NNUE file name not found for: $1" return 1 fi if [ -f "$_filename" ]; then if validate_network "$_filename"; then echo "Existing $_filename validated, skipping download" return else echo "Removing invalid NNUE file: $_filename" fi fi if [ -z "$wget_or_curl" ]; then >&2 printf "%s\n" "Neither wget or curl is installed." \ "Install one of these tools to download NNUE files automatically." exit 1 fi for url in \ "https://tests.stockfishchess.org/api/nn/$_filename" \ "https://github.com/official-stockfish/networks/raw/master/$_filename"; do echo "Downloading from $url ..." if $wget_or_curl "$url" >"$_filename"; then if validate_network "$_filename"; then echo "Successfully validated $_filename" else rm -f $_filename echo "Downloaded $_filename is invalid, and has been removed." continue fi else rm -f $_filename echo "Failed to download from $url" fi if [ -f "$_filename" ]; then return fi done # Download was not successful in the loop, return false. >&2 echo "Failed to download $_filename" return 1 } fetch_network EvalFileDefaultNameBig && fetch_network EvalFileDefaultNameSmall ================================================ FILE: src/Makefile ================================================ # Stockfish, a UCI chess playing engine derived from Glaurung 2.1 # Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) # # Stockfish is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Stockfish is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ### ========================================================================== ### Section 1. General Configuration ### ========================================================================== ### Establish the operating system name KERNEL := $(shell uname -s) ifeq ($(KERNEL),Linux) OS := $(shell uname -o) endif ### Command prefix to run the built executable (e.g. wine, sde, qemu) ### Backward compatible alias: WINE_PATH (deprecated) ifneq ($(strip $(WINE_PATH)),) ifeq ($(strip $(RUN_PREFIX)),) RUN_PREFIX := $(WINE_PATH) endif ifeq ($(MAKELEVEL),0) ifneq ($(strip $(RUN_PREFIX)),$(strip $(WINE_PATH))) $(warning *** Both RUN_PREFIX and WINE_PATH are set; ignoring WINE_PATH. ***) else $(warning *** WINE_PATH is deprecated; use RUN_PREFIX instead. ***) endif endif endif ### Target Windows OS ifeq ($(OS),Windows_NT) ifneq ($(COMP),ndk) target_windows = yes endif else ifeq ($(COMP),mingw) target_windows = yes ifeq ($(RUN_PREFIX),) RUN_PREFIX := $(shell which wine) endif endif ### Executable name ifeq ($(target_windows),yes) EXE = stockfish.exe else EXE = stockfish endif ### Installation dir definitions PREFIX = /usr/local BINDIR = $(PREFIX)/bin ### Built-in benchmark for pgo-builds PGOBENCH = $(RUN_PREFIX) ./$(EXE) bench ### Source and object files SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ misc.cpp movegen.cpp movepick.cpp position.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ nnue/nnue_accumulator.cpp nnue/nnue_misc.cpp nnue/network.cpp \ nnue/features/half_ka_v2_hm.cpp nnue/features/full_threats.cpp \ engine.cpp score.cpp memory.cpp HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h history.h \ nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/features/full_threats.h \ nnue/layers/affine_transform.h nnue/layers/affine_transform_sparse_input.h \ nnue/layers/clipped_relu.h nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h \ nnue/nnue_architecture.h nnue/nnue_common.h nnue/nnue_feature_transformer.h nnue/simd.h \ position.h search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h shm.h shm_linux.h OBJS = $(notdir $(SRCS:.cpp=.o)) VPATH = syzygy:nnue:nnue/features ### ========================================================================== ### Section 2. High-level Configuration ### ========================================================================== # # flag --- Comp switch --- Description # ---------------------------------------------------------------------------- # # debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode # sanitize = none/ ... (-fsanitize ) # --- ( undefined ) --- enable undefined behavior checks # --- ( thread ) --- enable threading error checks # --- ( address ) --- enable memory access checks # --- ...etc... --- see compiler documentation for supported sanitizers # optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations # arch = (name) --- (-arch) --- Target architecture # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction # pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions # mmx = yes/no --- -mmmx --- Use Intel MMX instructions # sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # avx512icl = yes/no --- ... multiple ... --- Use All AVX-512 features available on both Intel Ice Lake and AMD Zen 4 # altivec = yes/no --- -maltivec --- Use PowerPC Altivec SIMD extension # vsx = yes/no --- -mvsx --- Use POWER VSX SIMD extension # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions # lsx = yes/no --- -mlsx --- Use Loongson SIMD eXtension # lasx = yes/no --- -mlasx --- use Loongson Advanced SIMD eXtension # # Note that Makefile is space sensitive, so when adding new architectures # or modifying existing flags, you have to make sure there are no extra spaces # at the end of the line for flag values. # # Example of use for these flags: # make build ARCH=x86-64-avx512 debug=yes sanitize="address undefined" ### 2.1. General and architecture defaults ifeq ($(ARCH),) ARCH = native endif ifeq ($(ARCH), native) override ARCH := $(shell $(SHELL) ../scripts/get_native_properties.sh | cut -d " " -f 1) endif # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-64-avx512icl SUPPORTED_ARCH=true` ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-avx512icl x86-64-vnni512 x86-64-avx512 x86-64-avxvnni \ x86-64-bmi2 x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-64-altivec ppc-64-vsx ppc-32 e2k \ armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64 \ loongarch64 loongarch64-lsx loongarch64-lasx)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false endif optimize = yes debug = no sanitize = none bits = 64 prefetch = no popcnt = no pext = no sse = no mmx = no sse2 = no ssse3 = no sse41 = no avx2 = no avxvnni = no avx512 = no vnni512 = no avx512icl = no altivec = no vsx = no neon = no dotprod = no arm_version = 0 lsx = no lasx = no STRIP = strip ifneq ($(shell which clang-format-20 2> /dev/null),) CLANG-FORMAT = clang-format-20 else CLANG-FORMAT = clang-format endif ### 2.2 Architecture specific ifeq ($(findstring x86,$(ARCH)),x86) # x86-32/64 ifeq ($(findstring x86-32,$(ARCH)),x86-32) arch = i386 bits = 32 sse = no mmx = yes else arch = x86_64 sse = yes sse2 = yes endif ifeq ($(findstring -sse,$(ARCH)),-sse) sse = yes endif ifeq ($(findstring -popcnt,$(ARCH)),-popcnt) popcnt = yes endif ifeq ($(findstring -mmx,$(ARCH)),-mmx) mmx = yes endif ifeq ($(findstring -sse2,$(ARCH)),-sse2) sse = yes sse2 = yes endif ifeq ($(findstring -ssse3,$(ARCH)),-ssse3) sse = yes sse2 = yes ssse3 = yes endif ifeq ($(findstring -sse41,$(ARCH)),-sse41) sse = yes sse2 = yes ssse3 = yes sse41 = yes endif ifeq ($(findstring -modern,$(ARCH)),-modern) $(warning *** ARCH=$(ARCH) is deprecated, defaulting to ARCH=x86-64-sse41-popcnt. Execute `make help` for a list of available architectures. ***) $(shell sleep 5) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes endif ifeq ($(findstring -avx2,$(ARCH)),-avx2) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes endif ifeq ($(findstring -avxvnni,$(ARCH)),-avxvnni) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes avxvnni = yes pext = yes endif ifeq ($(findstring -bmi2,$(ARCH)),-bmi2) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes pext = yes endif ifeq ($(findstring -avx512,$(ARCH)),-avx512) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes pext = yes avx512 = yes endif ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes pext = yes avx512 = yes vnni512 = yes endif ifeq ($(findstring -avx512icl,$(ARCH)),-avx512icl) popcnt = yes sse = yes sse2 = yes ssse3 = yes sse41 = yes avx2 = yes pext = yes avx512 = yes vnni512 = yes avx512icl = yes endif ifeq ($(sse),yes) prefetch = yes endif # 64-bit pext is not available on x86-32 ifeq ($(bits),32) pext = no endif else # all other architectures ifeq ($(ARCH),general-32) arch = any bits = 32 endif ifeq ($(ARCH),general-64) arch = any endif ifeq ($(ARCH),armv7) arch = armv7 prefetch = yes bits = 32 arm_version = 7 endif ifeq ($(ARCH),armv7-neon) arch = armv7 prefetch = yes popcnt = yes neon = yes bits = 32 arm_version = 7 endif ifeq ($(ARCH),armv8) arch = armv8 prefetch = yes popcnt = yes neon = yes arm_version = 8 endif ifeq ($(ARCH),armv8-dotprod) arch = armv8 prefetch = yes popcnt = yes neon = yes dotprod = yes arm_version = 8 endif ifeq ($(ARCH),apple-silicon) arch = arm64 prefetch = yes popcnt = yes neon = yes dotprod = yes arm_version = 8 endif ifeq ($(ARCH),ppc-32) arch = ppc bits = 32 endif ifeq ($(ARCH),ppc-64) arch = ppc64 popcnt = yes prefetch = yes endif ifeq ($(ARCH),ppc-64-altivec) arch = ppc64 popcnt = yes prefetch = yes altivec = yes endif ifeq ($(ARCH),ppc-64-vsx) arch = ppc64 popcnt = yes prefetch = yes vsx = yes endif ifeq ($(findstring e2k,$(ARCH)),e2k) arch = e2k mmx = yes bits = 64 sse = yes sse2 = yes ssse3 = yes sse41 = yes popcnt = yes endif ifeq ($(ARCH),riscv64) arch = riscv64 endif ifeq ($(findstring loongarch64,$(ARCH)),loongarch64) arch = loongarch64 prefetch = yes ifeq ($(findstring -lasx,$(ARCH)),-lasx) lsx = yes lasx = yes endif ifeq ($(findstring -lsx,$(ARCH)),-lsx) lsx = yes endif endif endif ### ========================================================================== ### Section 3. Low-level Configuration ### ========================================================================== ### 3.1 Selecting compiler (default = gcc) ifeq ($(MAKELEVEL),0) export ENV_CXXFLAGS := $(CXXFLAGS) export ENV_DEPENDFLAGS := $(DEPENDFLAGS) export ENV_LDFLAGS := $(LDFLAGS) endif CXXFLAGS = $(ENV_CXXFLAGS) -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) DEPENDFLAGS = $(ENV_DEPENDFLAGS) -std=c++17 LDFLAGS = $(ENV_LDFLAGS) $(EXTRALDFLAGS) ifeq ($(COMP),) COMP=gcc endif ifeq ($(COMP),gcc) comp=gcc CXX=g++ CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif ifeq ($(ARCH),riscv64) CXXFLAGS += -latomic endif else ifeq ($(arch),loongarch64) CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif ifeq ($(arch),$(filter $(arch),armv7)) LDFLAGS += -latomic endif ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif endif ifeq ($(target_windows),yes) LDFLAGS += -static endif ifeq ($(COMP),mingw) comp=mingw ifeq ($(bits),64) ifeq ($(shell which x86_64-w64-mingw32-c++-posix 2> /dev/null),) CXX=x86_64-w64-mingw32-c++ else CXX=x86_64-w64-mingw32-c++-posix endif else ifeq ($(shell which i686-w64-mingw32-c++-posix 2> /dev/null),) CXX=i686-w64-mingw32-c++ else CXX=i686-w64-mingw32-c++-posix endif endif CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations endif ifeq ($(COMP),icx) comp=icx CXX=icpx CXXFLAGS += --intel -pedantic -Wextra -Wshadow -Wmissing-prototypes \ -Wconditional-uninitialized -Wabi -Wdeprecated endif ifeq ($(COMP),clang) comp=clang CXX=clang++ ifeq ($(target_windows),yes) CXX=x86_64-w64-mingw32-clang++ endif CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-prototypes \ -Wconditional-uninitialized -flax-vector-conversions=none ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),) ifeq ($(target_windows),) ifneq ($(RTLIB),compiler-rt) LDFLAGS += -latomic endif endif endif ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif ifeq ($(ARCH),riscv64) CXXFLAGS += -latomic endif else ifeq ($(arch),loongarch64) CXXFLAGS += -latomic else CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) endif endif ifeq ($(KERNEL),Darwin) CXXFLAGS += -mmacosx-version-min=10.15 LDFLAGS += -mmacosx-version-min=10.15 ifneq ($(arch),any) CXXFLAGS += -arch $(arch) LDFLAGS += -arch $(arch) endif XCRUN = xcrun endif # To cross-compile for Android, use NDK version r27c or later. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ comp=clang ifeq ($(arch),armv7) CXX=armv7a-linux-androideabi29-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon ifneq ($(shell which arm-linux-androideabi-strip 2>/dev/null),) STRIP=arm-linux-androideabi-strip else STRIP=llvm-strip endif endif ifeq ($(arch),armv8) CXX=aarch64-linux-android29-clang++ ifneq ($(shell which aarch64-linux-android-strip 2>/dev/null),) STRIP=aarch64-linux-android-strip else STRIP=llvm-strip endif endif ifeq ($(arch),x86_64) CXX=x86_64-linux-android29-clang++ ifneq ($(shell which x86_64-linux-android-strip 2>/dev/null),) STRIP=x86_64-linux-android-strip else STRIP=llvm-strip endif endif LDFLAGS += -static-libstdc++ endif ### Allow overwriting CXX from command line ifdef COMPCXX CXX=$(COMPCXX) endif # llvm-profdata must be version compatible with the specified CXX (be it clang, or the gcc alias) # make -j profile-build CXX=clang++-20 COMP=clang # Locate the version in the same directory as the compiler used, # with fallback to a generic one if it can't be located LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))llvm-profdata # for icx ifeq ($(wildcard $(LLVM_PROFDATA)),) LLVM_PROFDATA := $(dir $(realpath $(shell which $(CXX) 2> /dev/null)))/compiler/llvm-profdata endif ifeq ($(wildcard $(LLVM_PROFDATA)),) LLVM_PROFDATA := llvm-profdata endif ifeq ($(comp),icx) profile_make = icx-profile-make profile_use = icx-profile-use else ifeq ($(comp),clang) profile_make = clang-profile-make profile_use = clang-profile-use else profile_make = gcc-profile-make profile_use = gcc-profile-use ifeq ($(KERNEL),Darwin) EXTRAPROFILEFLAGS = -fvisibility=hidden endif endif ### Sometimes gcc is really clang ifeq ($(COMP),gcc) gccversion := $(shell $(CXX) --version 2>/dev/null) gccisclang := $(findstring clang,$(gccversion)) ifneq ($(gccisclang),) profile_make = clang-profile-make profile_use = clang-profile-use else CXXFLAGS += -Wstack-usage=128000 endif endif ### On mingw use Windows threads, otherwise POSIX ifneq ($(comp),mingw) CXXFLAGS += -DUSE_PTHREADS # On Android Bionic's C library comes with its own pthread implementation bundled in ifneq ($(OS),Android) # Haiku has pthreads in its libroot, so only link it in on other platforms ifneq ($(KERNEL),Haiku) ifneq ($(COMP),ndk) LDFLAGS += -lpthread add_lrt = yes ifeq ($(target_windows),yes) add_lrt = no endif ifeq ($(KERNEL),Darwin) add_lrt = no endif ifeq ($(add_lrt),yes) LDFLAGS += -lrt endif endif endif endif endif ### 3.2.1 Debugging ifeq ($(debug),no) CXXFLAGS += -DNDEBUG else CXXFLAGS += -g CXXFLAGS += -D_GLIBCXX_ASSERTIONS -D_GLIBCXX_DEBUG endif ### 3.2.2 Debugging with undefined behavior sanitizers ifneq ($(sanitize),none) CXXFLAGS += -g3 $(addprefix -fsanitize=,$(sanitize)) LDFLAGS += $(addprefix -fsanitize=,$(sanitize)) endif ### 3.3 Optimization ifeq ($(optimize),yes) CXXFLAGS += -O3 -funroll-loops ifeq ($(comp),gcc) ifeq ($(OS), Android) CXXFLAGS += -fno-gcse -mthumb -march=armv7-a -mfloat-abi=softfp endif endif ifeq ($(KERNEL),Darwin) ifeq ($(comp),$(filter $(comp),clang icx)) CXXFLAGS += -mdynamic-no-pic endif ifeq ($(comp),gcc) ifneq ($(arch),arm64) CXXFLAGS += -mdynamic-no-pic endif endif endif ifeq ($(comp),clang) clangmajorversion := $(shell $(CXX) -dumpversion 2>/dev/null | cut -f1 -d.) ifeq ($(shell expr $(clangmajorversion) \< 16),1) CXXFLAGS += -fexperimental-new-pass-manager endif endif endif ### 3.4 Bits ifeq ($(bits),64) CXXFLAGS += -DIS_64BIT endif ### 3.5 prefetch and popcount ifeq ($(prefetch),yes) ifeq ($(sse),yes) CXXFLAGS += -msse endif else CXXFLAGS += -DNO_PREFETCH endif ifeq ($(popcnt),yes) ifeq ($(arch),$(filter $(arch),ppc64 ppc64-altivec ppc64-vsx armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT else CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT endif endif ### 3.6 SIMD architectures ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx2 -mbmi endif endif ifeq ($(avxvnni),yes) CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavxvnni endif endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw -mavx512dq -mavx512vl endif endif ifeq ($(vnni512),yes) CXXFLAGS += -DUSE_VNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl endif endif ifeq ($(avx512icl),yes) CXXFLAGS += -DUSE_AVX512 -DUSE_VNNI -DUSE_AVX512ICL ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx512vpopcntdq -mavx512bitalg -mavx512vnni -mvpclmulqdq -mgfni -mvaes endif endif ifeq ($(sse41),yes) CXXFLAGS += -DUSE_SSE41 ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse4.1 endif endif ifeq ($(ssse3),yes) CXXFLAGS += -DUSE_SSSE3 ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mssse3 endif endif ifeq ($(sse2),yes) CXXFLAGS += -DUSE_SSE2 ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse2 endif endif ifeq ($(mmx),yes) ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mmmx endif endif ifeq ($(altivec),yes) CXXFLAGS += -maltivec ifeq ($(COMP),gcc) CXXFLAGS += -mabi=altivec endif endif ifeq ($(vsx),yes) CXXFLAGS += -mvsx ifeq ($(COMP),gcc) CXXFLAGS += -DNO_WARN_X86_INTRINSICS -DUSE_SSE2 endif endif ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON=$(arm_version) ifeq ($(KERNEL),Linux) ifneq ($(COMP),ndk) ifneq ($(arch),armv8) CXXFLAGS += -mfpu=neon endif endif endif endif ifeq ($(dotprod),yes) CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD endif ifeq ($(lasx),yes) ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mlasx endif endif ifeq ($(lsx),yes) ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mlsx endif endif ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mbmi2 endif endif ### 3.8.1 Try to include git info for versioning and avoid recompiles if nothing changes BUILD_SHA_FILE := .build_sha.txt BUILD_DATE_FILE := .build_date.txt GIT_SHA := $(shell git rev-parse HEAD 2>/dev/null | cut -c 1-8 || true) GIT_DATE := $(shell git show -s --date=format:%Y%m%d --format=%cd HEAD 2>/dev/null || true) COMPILER_DATE := $(shell date +%Y%m%d 2>/dev/null) BUILD_DATE := $(if $(GIT_DATE),$(GIT_DATE),$(COMPILER_DATE)) define cache_file_contents $(shell \ if [ ! -f "$(1)" ] || [ "$$(cat "$(1)" 2>/dev/null)" != "$(2)" ]; then \ printf '%s\n' "$(2)" > "$(1)"; \ fi) endef ifneq ($(filter $(MAKECMDGOALS),help strip install clean net objclean profileclean format config-sanity),$(MAKECMDGOALS)) _ := $(call cache_file_contents,$(BUILD_SHA_FILE),$(GIT_SHA)) _ := $(call cache_file_contents,$(BUILD_DATE_FILE),$(BUILD_DATE)) endif ### 3.8.2 Try to include architecture ifneq ($(ARCH), ) CXXFLAGS += -DARCH=$(ARCH) endif ### 3.9 Link Time Optimization ### This is a mix of compile and link time options because the lto link phase ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug),no) ifneq ($(KERNEL),Darwin) LLD_BIN := $(shell command -v ld.lld 2>/dev/null) ifeq ($(LLD_BIN),) LLD_BIN := $(shell command -v lld 2>/dev/null) endif ifneq ($(LLD_BIN),) ifeq ($(comp),clang) LDFLAGS += -fuse-ld=lld else ifeq ($(comp),gcc) ifneq ($(gccisclang),) LDFLAGS += -fuse-ld=lld endif endif endif endif ifeq ($(comp),$(filter $(comp),clang icx)) CXXFLAGS += -flto=full ifeq ($(comp),icx) CXXFLAGS += -fwhole-program-vtables endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be # GCC on some systems. else ifeq ($(comp),gcc) ifeq ($(gccisclang),) CXXFLAGS += -flto -flto-partition=one LDFLAGS += $(CXXFLAGS) -flto=jobserver else CXXFLAGS += -flto=full LDFLAGS += $(CXXFLAGS) endif # To use LTO and static linking on Windows, # the tool chain requires gcc version 10.1 or later. else ifeq ($(comp),mingw) CXXFLAGS += -flto -flto-partition=one LDFLAGS += $(CXXFLAGS) -save-temps endif endif endif ### 3.10 Android 5 can only run position independent executables. Note that this ### breaks Android 4.0 and earlier. ifeq ($(OS), Android) CXXFLAGS += -fPIE LDFLAGS += -fPIE -pie endif ### 3.11 Inline settings ifeq ($(optimize), yes) ifeq ($(comp), clang) CXXFLAGS += -Xclang -mllvm -Xclang -inline-threshold=500 endif endif ### ========================================================================== ### Section 4. Public Targets ### ========================================================================== help: @echo "" && \ echo "To compile stockfish, type: " && \ echo "" && \ echo "make -j target [ARCH=arch] [COMP=compiler] [COMPCXX=cxx]" && \ echo "" && \ echo "Supported targets:" && \ echo "" && \ echo "help > Display architecture details" && \ echo "profile-build > standard build with profile-guided optimization" && \ echo "build > skip profile-guided optimization" && \ echo "net > Download the default nnue nets" && \ echo "strip > Strip executable" && \ echo "install > Install executable" && \ echo "clean > Clean up" && \ echo "" && \ echo "Supported archs:" && \ echo "" && \ echo "native > select the best architecture for the host processor (default)" && \ echo "x86-64-avx512icl > x86 64-bit with minimum avx512 support of Intel Ice Lake or AMD Zen 4" && \ echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" && \ echo "x86-64-avx512 > x86 64-bit with avx512 support" && \ echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" && \ echo "x86-64-bmi2 > x86 64-bit with bmi2 support" && \ echo "x86-64-avx2 > x86 64-bit with avx2 support" && \ echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" && \ echo "x86-64-modern > deprecated, currently x86-64-sse41-popcnt" && \ echo "x86-64-ssse3 > x86 64-bit with ssse3 support" && \ echo "x86-64-sse3-popcnt > x86 64-bit with sse3 compile and popcnt support" && \ echo "x86-64 > x86 64-bit generic (with sse2 support)" && \ echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" && \ echo "x86-32-sse2 > x86 32-bit with sse2 support" && \ echo "x86-32 > x86 32-bit generic (with mmx compile support)" && \ echo "ppc-64 > PPC 64-bit" && \ echo "ppc-64-altivec > PPC 64-bit with altivec support" && \ echo "ppc-64-vsx > PPC 64-bit with vsx support" && \ echo "ppc-32 > PPC 32-bit" && \ echo "armv7 > ARMv7 32-bit" && \ echo "armv7-neon > ARMv7 32-bit with popcnt and neon" && \ echo "armv8 > ARMv8 64-bit with popcnt and neon" && \ echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" && \ echo "e2k > Elbrus 2000" && \ echo "apple-silicon > Apple silicon ARM64" && \ echo "general-64 > unspecified 64-bit" && \ echo "general-32 > unspecified 32-bit" && \ echo "riscv64 > RISC-V 64-bit" && \ echo "loongarch64 > LoongArch 64-bit" && \ echo "loongarch64-lsx > LoongArch 64-bit with SIMD eXtension" && \ echo "loongarch64-lasx > LoongArch 64-bit with Advanced SIMD eXtension" && \ echo "" && \ echo "Supported compilers:" && \ echo "" && \ echo "gcc > GNU compiler (default)" && \ echo "mingw > GNU compiler with MinGW under Windows" && \ echo "clang > LLVM Clang compiler" && \ echo "icx > Intel oneAPI DPC++/C++ Compiler" && \ echo "ndk > Google NDK to cross-compile for Android" && \ echo "" && \ echo "Simple examples. If you don't know what to do, you likely want to run one of: " && \ echo "" && \ echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " && \ echo "make -j profile-build ARCH=x86-64-sse41-popcnt # A more portable compile for 64-bit systems " && \ echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " && \ echo "" && \ echo "Advanced examples, for experienced users: " && \ echo "" && \ echo "make -j profile-build ARCH=x86-64-avxvnni" && \ echo "make -j profile-build ARCH=x86-64-avxvnni COMP=gcc COMPCXX=g++-12.0" && \ echo "make -j build ARCH=x86-64-ssse3 COMP=clang" && \ echo "" ifneq ($(SUPPORTED_ARCH), true) @echo "Specify a supported architecture with the ARCH option for more details" @echo "" endif .PHONY: help analyze build profile-build strip install clean net \ objclean profileclean config-sanity \ icx-profile-use icx-profile-make \ gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make FORCE \ format analyze analyze: net config-sanity objclean $(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS) build: net config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @echo "" @echo "Step 2/4. Running benchmark for pgo-build ..." $(PGOBENCH) > PGOBENCH.out 2>&1 tail -n 4 PGOBENCH.out @echo "" @echo "Step 3/4. Building optimized executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) @echo "" @echo "Step 4/4. Deleting profile data ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean strip: $(STRIP) $(EXE) install: -mkdir -p -m 755 $(BINDIR) -cp $(EXE) $(BINDIR) $(STRIP) $(BINDIR)/$(EXE) # clean all clean: objclean profileclean @rm -f .depend *~ core # clean binaries and objects objclean: @rm -f stockfish stockfish.exe *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o $(BUILD_SHA_FILE) $(BUILD_DATE_FILE) # clean auxiliary profiling files profileclean: @rm -rf profdir @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s PGOBENCH.out @rm -f stockfish.profdata *.profraw @rm -f stockfish.*args* @rm -f stockfish.*lt* @rm -f stockfish.res @rm -f ./-lstdc++.res # evaluation network (nnue) net: @$(SHELL) ../scripts/net.sh format: $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file ### ========================================================================== ### Section 5. Private Targets ### ========================================================================== all: $(EXE) .depend config-sanity: net @echo "" @echo "Config:" && \ echo "debug: '$(debug)'" && \ echo "sanitize: '$(sanitize)'" && \ echo "optimize: '$(optimize)'" && \ echo "arch: '$(arch)'" && \ echo "bits: '$(bits)'" && \ echo "kernel: '$(KERNEL)'" && \ echo "os: '$(OS)'" && \ echo "prefetch: '$(prefetch)'" && \ echo "popcnt: '$(popcnt)'" && \ echo "pext: '$(pext)'" && \ echo "sse: '$(sse)'" && \ echo "mmx: '$(mmx)'" && \ echo "sse2: '$(sse2)'" && \ echo "ssse3: '$(ssse3)'" && \ echo "sse41: '$(sse41)'" && \ echo "avx2: '$(avx2)'" && \ echo "avxvnni: '$(avxvnni)'" && \ echo "avx512: '$(avx512)'" && \ echo "vnni512: '$(vnni512)'" && \ echo "avx512icl: '$(avx512icl)'" && \ echo "altivec: '$(altivec)'" && \ echo "vsx: '$(vsx)'" && \ echo "neon: '$(neon)'" && \ echo "dotprod: '$(dotprod)'" && \ echo "arm_version: '$(arm_version)'" && \ echo "lsx: '$(lsx)'" && \ echo "lasx: '$(lasx)'" && \ echo "target_windows: '$(target_windows)'" && \ echo "" && \ echo "Flags:" && \ echo "CXX: $(CXX)" && \ echo "CXXFLAGS: $(CXXFLAGS)" && \ echo "LDFLAGS: $(LDFLAGS)" && \ echo "" && \ echo "Testing config sanity. If this fails, try 'make help' ..." && \ echo "" && \ (test "$(debug)" = "yes" || test "$(debug)" = "no") && \ (test "$(optimize)" = "yes" || test "$(optimize)" = "no") && \ (test "$(SUPPORTED_ARCH)" = "true") && \ (test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "e2k" || \ test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" || \ test "$(arch)" = "riscv64" || test "$(arch)" = "loongarch64") && \ (test "$(bits)" = "32" || test "$(bits)" = "64") && \ (test "$(prefetch)" = "yes" || test "$(prefetch)" = "no") && \ (test "$(popcnt)" = "yes" || test "$(popcnt)" = "no") && \ (test "$(pext)" = "yes" || test "$(pext)" = "no") && \ (test "$(sse)" = "yes" || test "$(sse)" = "no") && \ (test "$(mmx)" = "yes" || test "$(mmx)" = "no") && \ (test "$(sse2)" = "yes" || test "$(sse2)" = "no") && \ (test "$(ssse3)" = "yes" || test "$(ssse3)" = "no") && \ (test "$(sse41)" = "yes" || test "$(sse41)" = "no") && \ (test "$(avx2)" = "yes" || test "$(avx2)" = "no") && \ (test "$(avx512)" = "yes" || test "$(avx512)" = "no") && \ (test "$(vnni512)" = "yes" || test "$(vnni512)" = "no") && \ (test "$(avx512icl)" = "yes" || test "$(avx512icl)" = "no") && \ (test "$(altivec)" = "yes" || test "$(altivec)" = "no") && \ (test "$(vsx)" = "yes" || test "$(vsx)" = "no") && \ (test "$(neon)" = "yes" || test "$(neon)" = "no") && \ (test "$(lsx)" = "yes" || test "$(lsx)" = "no") && \ (test "$(lasx)" = "yes" || test "$(lasx)" = "no") && \ (test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || \ test "$(comp)" = "clang" || test "$(comp)" = "armv7a-linux-androideabi16-clang" || \ test "$(comp)" = "aarch64-linux-android21-clang") $(EXE): $(OBJS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS) %.o: %.cpp $(strip $(CXX) $(CPPFLAGS) $(CXXFLAGS)) -c -o $@ $< # Cache git metadata when available, otherwise cache the compiler date. misc.o: misc.cpp $(BUILD_SHA_FILE) $(BUILD_DATE_FILE) @sha="$$(cat $(BUILD_SHA_FILE))"; \ set -- $(CXX) $(CPPFLAGS) $(CXXFLAGS); \ test -n "$$sha" && set -- "$$@" -DGIT_SHA=$$sha; \ test -n "$(GIT_DATE)" && set -- "$$@" -DGIT_DATE=$(GIT_DATE); \ set -- "$$@" -c $< -o $@; \ printf '%s ' "$$@"; \ printf '\n'; \ "$$@" clang-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-generate ' \ EXTRALDFLAGS=' -fprofile-generate' \ all clang-profile-use: $(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ all gcc-profile-make: @mkdir -p profdir $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-generate=profdir' \ EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ EXTRALDFLAGS='-lgcov' \ all gcc-profile-use: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-use=profdir -fno-peel-loops -fno-tracer' \ EXTRACXXFLAGS+=$(EXTRAPROFILEFLAGS) \ EXTRALDFLAGS='-lgcov' \ all icx-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-generate ' \ EXTRALDFLAGS=' -fprofile-instr-generate' \ all icx-profile-use: $(XCRUN) $(LLVM_PROFDATA) merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ all .depend: $(SRCS) -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean format config-sanity)) -include .depend endif ================================================ FILE: src/benchmark.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "benchmark.h" #include "numa.h" #include #include #include #include namespace { // clang-format off const std::vector Defaults = { "setoption name UCI_Chess960 value false", "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", "8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11", "4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19", "rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves d4e6", "r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves g2g4", "r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15", "r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13", "r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16", "4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17", "2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11", "r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16", "3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22", "r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18", "4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22", "3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26", "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", "8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1", "5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90", "4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21", "r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16", "3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40", "4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1", "1r6/1P4bk/3qr1p1/N6p/3pp2P/6R1/3Q1PP1/1R4K1 w - - 1 42", // Positions with high numbers of changed threats "k7/2n1n3/1nbNbn2/2NbRBn1/1nbRQR2/2NBRBN1/3N1N2/7K w - - 0 1", "K7/8/8/BNQNQNB1/N5N1/R1Q1q2r/n5n1/bnqnqnbk w - - 0 1", // 5-man positions "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw // 6-man positions "8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", // Re5 - mate "8/2p4P/8/kr6/6R1/8/8/1K6 w - - 0 1", // Ka2 - mate "8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", // Nd2 - draw // 7-man positions "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw // Mate and stalemate positions "6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1", "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", "8/8/8/8/8/6k1/6p1/6K1 w - -", "7k/7P/6K1/8/3B4/8/8/8 b - -", // Chess 960 "setoption name UCI_Chess960 value true", "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", "setoption name UCI_Chess960 value false" }; // clang-format on // clang-format off // human-randomly picked 5 games with <60 moves from // https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0 // only moves for one side const std::vector> BenchmarkPositions = { { "rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8", "rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9", "r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10", "r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11", "r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12", "r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13", "r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14", "r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15", "r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16", "r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17", "r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18", "r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19", "1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20", "1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21", "1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22", "1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23", "1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24", "1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25", "1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26", "1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27", "1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28", "1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29", "1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30", "1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31", "3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32", "3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33", "8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34", "8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35", "8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36", "1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37", "8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38", "1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39", "1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40", "1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41", "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42", "5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43", "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44", "5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45", "8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46", "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47", "3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48", "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49", "3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50", "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51", "3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52", "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53", "3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54", "3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55", "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56", "8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57", "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58", "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59", "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60", "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61", "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62", "8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63", "4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64", }, { "r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6", "r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7", "r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8", "r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9", "r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10", "r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11", "3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12", "q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13", "r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14", "r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15", "r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16", "r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17", "r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18", "r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19", "r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20", "r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21", "rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22", "1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23", "1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24", "8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25", "8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26", "2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27", "8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28", "8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29", "8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30", "8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31", "8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32", "8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33", "8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34", "8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35", "8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36", "8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37", "8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38", "8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39", "8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40", "8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41", "8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42", "8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43", "8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44", "8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45", "8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46", "8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47", "8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48", "8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49", "2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50", "2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51", "2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52", "3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53", "3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54", "3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55", "3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56", "3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57", }, { "rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8", "rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9", "rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10", "r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11", "r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12", "r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13", "r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14", "r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15", "r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16", "r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17", "r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18", "r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19", "r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20", "r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21", "r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22", "r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23", "r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24", "r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25", "r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26", "r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27", "5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28", "5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29", "4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30", "4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31", "4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32", "4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33", "4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34", "4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35", "4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36", "4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37", "4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38", "5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39", "5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40", "5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41", "5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42", "1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43", "1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44", "1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45", "1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46", "1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47", "5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48", "5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49", "5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50", "8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51", "8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52", "8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53", "8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54", "8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55", "8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56", }, { "rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7", "r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8", "r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9", "r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10", "r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11", "r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12", "r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13", "r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14", "r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15", "r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16", "r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17", "r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18", "r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19", "r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20", "r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21", "2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22", "2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23", "2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24", "2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25", "2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26", "r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27", "rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28", "rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29", "rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30", "rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31", "rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32", "rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33", "rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34", "rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35", "1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36", "1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37", "1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38", "1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39", "1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40", "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41", "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42", "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43", "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44", "1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45", "2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46", "Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47", }, { "rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6", "r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7", "r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8", "r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9", "r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10", "r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11", "r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12", "r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13", "r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14", "r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15", "r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16", "2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17", "2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18", "2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19", "2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20", "2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21", "2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22", "2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23", "2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24", "2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25", "2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26", "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27", "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28", "3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29", "3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30", "3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31", "3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32", "6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33", "6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34", "6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35", "6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36", "6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37", "6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38", "6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39", "6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40", "6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41", "6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42", "6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43", "6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44", "8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45", "3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46", "8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47", "8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48", "8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49", "8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50", "8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51", "8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52", "2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53", "6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54", "R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55", "R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56", "8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57", "8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58", "8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59", "8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60", "8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61", "8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62", "8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63", "8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64", } }; // clang-format on } // namespace namespace Stockfish::Benchmark { // Builds a list of UCI commands to be run by bench. There // are five parameters: TT size in MB, number of search threads that // should be used, the limit value spent for each position, a file name // where to look for positions in FEN format, and the type of the limit: // depth, perft, nodes and movetime (in milliseconds). Examples: // // bench : search default positions up to depth 13 // bench 64 1 15 : search default positions up to depth 15 (TT = 64MB) // bench 64 1 100000 default nodes : search default positions for 100K nodes each // bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec // bench 16 1 5 blah perft : run a perft 5 on positions in file "blah" std::vector setup_bench(const std::string& currentFen, std::istream& is) { std::vector fens, list; std::string go, token; // Assign default values to missing arguments std::string ttSize = (is >> token) ? token : "16"; std::string threads = (is >> token) ? token : "1"; std::string limit = (is >> token) ? token : "13"; std::string fenFile = (is >> token) ? token : "default"; std::string limitType = (is >> token) ? token : "depth"; go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; if (fenFile == "default") fens = Defaults; else if (fenFile == "current") fens.push_back(currentFen); else { std::string fen; std::ifstream file(fenFile); if (!file.is_open()) { std::cerr << "Unable to open file " << fenFile << std::endl; exit(EXIT_FAILURE); } while (getline(file, fen)) if (!fen.empty()) fens.push_back(fen); file.close(); } list.emplace_back("setoption name Threads value " + threads); list.emplace_back("setoption name Hash value " + ttSize); list.emplace_back("ucinewgame"); for (const std::string& fen : fens) if (fen.find("setoption") != std::string::npos) list.emplace_back(fen); else { list.emplace_back("position fen " + fen); list.emplace_back(go); } return list; } BenchmarkSetup setup_benchmark(std::istream& is) { // TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions // for the current sequence have been searched. static constexpr int TT_SIZE_PER_THREAD = 128; static constexpr int DEFAULT_DURATION_S = 150; BenchmarkSetup setup{}; // Assign default values to missing arguments int desiredTimeS; if (!(is >> setup.threads)) setup.threads = int(get_hardware_concurrency()); else setup.originalInvocation += std::to_string(setup.threads); if (!(is >> setup.ttSize)) setup.ttSize = TT_SIZE_PER_THREAD * setup.threads; else setup.originalInvocation += " " + std::to_string(setup.ttSize); if (!(is >> desiredTimeS)) desiredTimeS = DEFAULT_DURATION_S; else setup.originalInvocation += " " + std::to_string(desiredTimeS); setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize) + " " + std::to_string(desiredTimeS); auto getCorrectedTime = [&](int ply) { // time per move is fit roughly based on LTC games // seconds = 50/{ply+15} // ms = 50000/{ply+15} // with this fit 10th move gets 2000ms // adjust for desired 10th move time return 50000.0 / (static_cast(ply) + 15.0); }; float totalTime = 0; for (const auto& game : BenchmarkPositions) { int ply = 1; for (int i = 0; i < static_cast(game.size()); ++i) { const float correctedTime = float(getCorrectedTime(ply)); totalTime += correctedTime; ply += 1; } } float timeScaleFactor = static_cast(desiredTimeS * 1000) / totalTime; for (const auto& game : BenchmarkPositions) { setup.commands.emplace_back("ucinewgame"); int ply = 1; for (const std::string& fen : game) { setup.commands.emplace_back("position fen " + fen); const int correctedTime = static_cast(getCorrectedTime(ply) * timeScaleFactor); setup.commands.emplace_back("go movetime " + std::to_string(correctedTime)); ply += 1; } } return setup; } } // namespace Stockfish ================================================ FILE: src/benchmark.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef BENCHMARK_H_INCLUDED #define BENCHMARK_H_INCLUDED #include #include #include namespace Stockfish::Benchmark { std::vector setup_bench(const std::string&, std::istream&); struct BenchmarkSetup { int ttSize; int threads; std::vector commands; std::string originalInvocation; std::string filledInvocation; }; BenchmarkSetup setup_benchmark(std::istream&); } // namespace Stockfish #endif // #ifndef BENCHMARK_H_INCLUDED ================================================ FILE: src/bitboard.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "bitboard.h" #include #include #include #include "misc.h" namespace Stockfish { uint8_t PopCnt16[1 << 16]; uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; Bitboard LineBB[SQUARE_NB][SQUARE_NB]; Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard RayPassBB[SQUARE_NB][SQUARE_NB]; alignas(64) Magic Magics[SQUARE_NB][2]; namespace { Bitboard RookTable[0x19000]; // To store rook attacks Bitboard BishopTable[0x1480]; // To store bishop attacks void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]); } // Returns an ASCII representation of a bitboard suitable // to be printed to standard output. Useful for debugging. std::string Bitboards::pretty(Bitboard b) { std::string s = "+---+---+---+---+---+---+---+---+\n"; for (Rank r = RANK_8;; --r) { for (File f = FILE_A; f <= FILE_H; ++f) s += b & make_square(f, r) ? "| X " : "| "; s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; if (r == RANK_1) break; } s += " a b c d e f g h\n"; return s; } // Initializes various bitboard tables. It is called at // startup and relies on global objects to be already zero-initialized. void Bitboards::init() { for (unsigned i = 0; i < (1 << 16); ++i) PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); init_magics(ROOK, RookTable, Magics); init_magics(BISHOP, BishopTable, Magics); for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) { for (PieceType pt : {BISHOP, ROOK}) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) { if (PseudoAttacks[pt][s1] & s2) { LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); RayPassBB[s1][s2] = attacks_bb(pt, s1, 0) & (attacks_bb(pt, s2, square_bb(s1)) | s2); } BetweenBB[s1][s2] |= s2; } } } namespace { // Computes all rook and bishop attacks at startup. Magic // bitboards are used to look up attacks of sliding pieces. As a reference see // https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use // the so called "fancy" approach. void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) { #ifndef USE_PEXT // Optimal PRNG seeds to pick the correct magics in the shortest time int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; Bitboard occupancy[4096]; int epoch[4096] = {}, cnt = 0; #endif Bitboard reference[4096]; int size = 0; for (Square s = SQ_A1; s <= SQ_H8; ++s) { // Board edges are not considered in the relevant occupancies Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); // Given a square 's', the mask is the bitboard of sliding attacks from // 's' computed on an empty board. The index must be big enough to contain // all the attacks for each possible subset of the mask and so is 2 power // the number of 1s of the mask. Hence we deduce the size of the shift to // apply to the 64 or 32 bits word to get the index. Magic& m = magics[s][pt - BISHOP]; m.mask = Bitboards::sliding_attack(pt, s, 0) & ~edges; #ifndef USE_PEXT m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); #endif // Set the offset for the attacks table of the square. We have individual // table sizes for each square with "Fancy Magic Bitboards". m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size; size = 0; // Use Carry-Rippler trick to enumerate all subsets of masks[s] and // store the corresponding sliding attack bitboard in reference[]. Bitboard b = 0; do { #ifndef USE_PEXT occupancy[size] = b; #endif reference[size] = Bitboards::sliding_attack(pt, s, b); if (HasPext) m.attacks[pext(b, m.mask)] = reference[size]; size++; b = (b - m.mask) & m.mask; } while (b); #ifndef USE_PEXT PRNG rng(seeds[Is64Bit][rank_of(s)]); // Find a magic for square 's' picking up an (almost) random number // until we find the one that passes the verification test. for (int i = 0; i < size;) { for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;) m.magic = rng.sparse_rand(); // A good magic must map every possible occupancy to an index that // looks up the correct sliding attack in the attacks[s] database. // Note that we build up the database for square 's' as a side // effect of verifying the magic. Keep track of the attempt count // and save it in epoch[], little speed-up trick to avoid resetting // m.attacks[] after every failed attempt. for (++cnt, i = 0; i < size; ++i) { unsigned idx = m.index(occupancy[i]); if (epoch[idx] < cnt) { epoch[idx] = cnt; m.attacks[idx] = reference[i]; } else if (m.attacks[idx] != reference[i]) break; } } #endif } } } } // namespace Stockfish ================================================ FILE: src/bitboard.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef BITBOARD_H_INCLUDED #define BITBOARD_H_INCLUDED #include #include #include #include #include #include #include #include #include #include "types.h" namespace Stockfish { namespace Bitboards { void init(); std::string pretty(Bitboard b); } // namespace Stockfish::Bitboards constexpr Bitboard FileABB = 0x0101010101010101ULL; constexpr Bitboard FileBBB = FileABB << 1; constexpr Bitboard FileCBB = FileABB << 2; constexpr Bitboard FileDBB = FileABB << 3; constexpr Bitboard FileEBB = FileABB << 4; constexpr Bitboard FileFBB = FileABB << 5; constexpr Bitboard FileGBB = FileABB << 6; constexpr Bitboard FileHBB = FileABB << 7; constexpr Bitboard Rank1BB = 0xFF; constexpr Bitboard Rank2BB = Rank1BB << (8 * 1); constexpr Bitboard Rank3BB = Rank1BB << (8 * 2); constexpr Bitboard Rank4BB = Rank1BB << (8 * 3); constexpr Bitboard Rank5BB = Rank1BB << (8 * 4); constexpr Bitboard Rank6BB = Rank1BB << (8 * 5); constexpr Bitboard Rank7BB = Rank1BB << (8 * 6); constexpr Bitboard Rank8BB = Rank1BB << (8 * 7); extern uint8_t PopCnt16[1 << 16]; extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; extern Bitboard RayPassBB[SQUARE_NB][SQUARE_NB]; // Magic holds all magic bitboards relevant data for a single square struct Magic { Bitboard mask; Bitboard* attacks; #ifndef USE_PEXT Bitboard magic; unsigned shift; #endif // Compute the attack's index using the 'magic bitboards' approach unsigned index(Bitboard occupied) const { #ifdef USE_PEXT return unsigned(pext(occupied, mask)); #else if (Is64Bit) return unsigned(((occupied & mask) * magic) >> shift); unsigned lo = unsigned(occupied) & unsigned(mask); unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; #endif } Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; } }; extern Magic Magics[SQUARE_NB][2]; constexpr Bitboard square_bb(Square s) { assert(is_ok(s)); return 1ULL << s; } // Overloads of bitwise operators between a Bitboard and a Square for testing // whether a given bit is set in a bitboard, and for setting and clearing bits. constexpr Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); } constexpr Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); } constexpr Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); } constexpr Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); } constexpr Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); } constexpr Bitboard operator&(Square s, Bitboard b) { return b & s; } constexpr Bitboard operator|(Square s, Bitboard b) { return b | s; } constexpr Bitboard operator^(Square s, Bitboard b) { return b ^ s; } constexpr Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } // rank_bb() and file_bb() return a bitboard representing all the squares on // the given file or rank. constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); } constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); } constexpr Bitboard file_bb(File f) { return FileABB << f; } constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); } // Moves a bitboard one or two steps as specified by the direction D template constexpr Bitboard shift(Bitboard b) { return D == NORTH ? b << 8 : D == SOUTH ? b >> 8 : D == NORTH + NORTH ? b << 16 : D == SOUTH + SOUTH ? b >> 16 : D == EAST ? (b & ~FileHBB) << 1 : D == WEST ? (b & ~FileABB) >> 1 : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7 : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9 : 0; } // Returns the squares attacked by pawns of the given color // from the squares in the given bitboard. template constexpr Bitboard pawn_attacks_bb(Bitboard b) { return C == WHITE ? shift(b) | shift(b) : shift(b) | shift(b); } // Returns a bitboard representing an entire line (from board edge // to board edge) that intersects the two given squares. If the given squares // are not on a same file/rank/diagonal, the function returns 0. For instance, // line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. inline Bitboard line_bb(Square s1, Square s2) { assert(is_ok(s1) && is_ok(s2)); return LineBB[s1][s2]; } // Returns a bitboard representing the squares in the semi-open // segment between the squares s1 and s2 (excluding s1 but including s2). If the // given squares are not on a same file/rank/diagonal, it returns s2. For instance, // between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but // between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick // allows to generate non-king evasion moves faster: the defending piece must either // interpose itself to cover the check or capture the checking piece. inline Bitboard between_bb(Square s1, Square s2) { assert(is_ok(s1) && is_ok(s2)); return BetweenBB[s1][s2]; } // distance() functions return the distance between x and y, defined as the // number of steps for a king in x to reach y. template inline int distance(Square x, Square y); template<> inline int distance(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); } template<> inline int distance(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); } template<> inline int distance(Square x, Square y) { return SquareDistance[x][y]; } inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } constexpr int constexpr_popcount(Bitboard b) { b = b - ((b >> 1) & 0x5555555555555555ULL); b = (b & 0x3333333333333333ULL) + ((b >> 2) & 0x3333333333333333ULL); b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL; return static_cast((b * 0x0101010101010101ULL) >> 56); } // Counts the number of non-zero bits in a bitboard. inline int popcount(Bitboard b) { #ifndef USE_POPCNT std::uint16_t indices[4]; std::memcpy(indices, &b, sizeof(b)); return PopCnt16[indices[0]] + PopCnt16[indices[1]] + PopCnt16[indices[2]] + PopCnt16[indices[3]]; #elif defined(_MSC_VER) return int(_mm_popcnt_u64(b)); #else // Assumed gcc or compatible compiler return __builtin_popcountll(b); #endif } // Returns the least significant bit in a non-zero bitboard. inline Square lsb(Bitboard b) { assert(b); #if defined(__GNUC__) // GCC, Clang, ICX return Square(__builtin_ctzll(b)); #elif defined(_MSC_VER) #ifdef _WIN64 // MSVC, WIN64 unsigned long idx; _BitScanForward64(&idx, b); return Square(idx); #else // MSVC, WIN32 unsigned long idx; if (b & 0xffffffff) { _BitScanForward(&idx, int32_t(b)); return Square(idx); } else { _BitScanForward(&idx, int32_t(b >> 32)); return Square(idx + 32); } #endif #else // Compiler is neither GCC nor MSVC compatible #error "Compiler not supported." #endif } // Returns the most significant bit in a non-zero bitboard. inline Square msb(Bitboard b) { assert(b); #if defined(__GNUC__) // GCC, Clang, ICX return Square(63 ^ __builtin_clzll(b)); #elif defined(_MSC_VER) #ifdef _WIN64 // MSVC, WIN64 unsigned long idx; _BitScanReverse64(&idx, b); return Square(idx); #else // MSVC, WIN32 unsigned long idx; if (b >> 32) { _BitScanReverse(&idx, int32_t(b >> 32)); return Square(idx + 32); } else { _BitScanReverse(&idx, int32_t(b)); return Square(idx); } #endif #else // Compiler is neither GCC nor MSVC compatible #error "Compiler not supported." #endif } // Returns the bitboard of the least significant // square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). inline Bitboard least_significant_square_bb(Bitboard b) { assert(b); return b & -b; } // Finds and clears the least significant bit in a non-zero bitboard. inline Square pop_lsb(Bitboard& b) { assert(b); const Square s = lsb(b); b &= b - 1; return s; } namespace Bitboards { // Returns the bitboard of target square for the given step // from the given square. If the step is off the board, returns empty bitboard. constexpr Bitboard safe_destination(Square s, int step) { constexpr auto abs = [](int v) { return v < 0 ? -v : v; }; Square to = Square(s + step); return is_ok(to) && abs(file_of(s) - file_of(to)) <= 2 ? square_bb(to) : Bitboard(0); } constexpr Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { Bitboard attacks = 0; Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; while (safe_destination(s, d)) { attacks |= (s += d); if (occupied & s) { break; } } } return attacks; } constexpr Bitboard knight_attack(Square sq) { Bitboard b = {}; for (int step : {-17, -15, -10, -6, 6, 10, 15, 17}) b |= safe_destination(sq, step); return b; } constexpr Bitboard king_attack(Square sq) { Bitboard b = {}; for (int step : {-9, -8, -7, -1, 1, 7, 8, 9}) b |= safe_destination(sq, step); return b; } constexpr Bitboard pseudo_attacks(PieceType pt, Square sq) { switch (pt) { case PieceType::ROOK : case PieceType::BISHOP : return sliding_attack(pt, sq, 0); case PieceType::QUEEN : return sliding_attack(PieceType::ROOK, sq, 0) | sliding_attack(PieceType::BISHOP, sq, 0); case PieceType::KNIGHT : return knight_attack(sq); case PieceType::KING : return king_attack(sq); default : assert(false); return 0; } } } inline constexpr auto PseudoAttacks = []() constexpr { std::array, PIECE_TYPE_NB> attacks{}; for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) { attacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); attacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); attacks[KING][s1] = Bitboards::pseudo_attacks(KING, s1); attacks[KNIGHT][s1] = Bitboards::pseudo_attacks(KNIGHT, s1); attacks[QUEEN][s1] = attacks[BISHOP][s1] = Bitboards::pseudo_attacks(BISHOP, s1); attacks[QUEEN][s1] |= attacks[ROOK][s1] = Bitboards::pseudo_attacks(ROOK, s1); } return attacks; }(); // Returns the pseudo attacks of the given piece type // assuming an empty board. template inline Bitboard attacks_bb(Square s, Color c = COLOR_NB) { assert((Pt != PAWN || c < COLOR_NB) && is_ok(s)); return Pt == PAWN ? PseudoAttacks[c][s] : PseudoAttacks[Pt][s]; } // Returns the attacks by the given piece // assuming the board is occupied according to the passed Bitboard. // Sliding piece attacks do not continue passed an occupied square. template inline Bitboard attacks_bb(Square s, Bitboard occupied) { assert(Pt != PAWN && is_ok(s)); switch (Pt) { case BISHOP : case ROOK : return Magics[s][Pt - BISHOP].attacks_bb(occupied); case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); default : return PseudoAttacks[Pt][s]; } } // Returns the attacks by the given piece // assuming the board is occupied according to the passed Bitboard. // Sliding piece attacks do not continue passed an occupied square. inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) { assert(pt != PAWN && is_ok(s)); switch (pt) { case BISHOP : return attacks_bb(s, occupied); case ROOK : return attacks_bb(s, occupied); case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); default : return PseudoAttacks[pt][s]; } } inline Bitboard attacks_bb(Piece pc, Square s, Bitboard occupied) { return type_of(pc) == PAWN ? PseudoAttacks[color_of(pc)][s] : attacks_bb(type_of(pc), s, occupied); } } // namespace Stockfish #endif // #ifndef BITBOARD_H_INCLUDED ================================================ FILE: src/engine.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "engine.h" #include #include #include #include #include #include #include #include #include #include #include "evaluate.h" #include "misc.h" #include "nnue/network.h" #include "nnue/nnue_common.h" #include "nnue/nnue_misc.h" #include "numa.h" #include "perft.h" #include "position.h" #include "search.h" #include "shm.h" #include "syzygy/tbprobe.h" #include "types.h" #include "uci.h" #include "ucioption.h" namespace Stockfish { namespace NN = Eval::NNUE; constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; int MaxThreads = std::max(1024, 4 * int(get_hardware_concurrency())); // The default configuration will attempt to group L3 domains up to 32 threads. // This size was found to be a good balance between the Elo gain of increased // history sharing and the speed loss from more cross-cache accesses (see // PR#6526). The user can always explicitly override this behavior. constexpr NumaAutoPolicy DefaultNumaPolicy = BundledL3Policy{32}; Engine::Engine(std::optional path) : binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""), numaContext(NumaConfig::from_system(DefaultNumaPolicy)), states(new std::deque(1)), threads(), networks(numaContext, get_default_networks()) { pos.set(StartFEN, false, &states->back()); options.add( // "Debug Log File", Option("", [](const Option& o) { start_logger(o); return std::nullopt; })); options.add( // "NumaPolicy", Option("auto", [this](const Option& o) { set_numa_config_from_option(o); return numa_config_information_as_string() + "\n" + thread_allocation_information_as_string(); })); options.add( // "Threads", Option(1, 1, MaxThreads, [this](const Option&) { resize_threads(); return thread_allocation_information_as_string(); })); options.add( // "Hash", Option(16, 1, MaxHashMB, [this](const Option& o) { set_tt_size(o); return std::nullopt; })); options.add( // "Clear Hash", Option([this](const Option&) { search_clear(); return std::nullopt; })); options.add( // "Ponder", Option(false)); options.add( // "MultiPV", Option(1, 1, MAX_MOVES)); options.add("Skill Level", Option(20, 0, 20)); options.add("Move Overhead", Option(10, 0, 5000)); options.add("nodestime", Option(0, 0, 10000)); options.add("UCI_Chess960", Option(false)); options.add("UCI_LimitStrength", Option(false)); options.add("UCI_Elo", Option(Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::HighestElo)); options.add("UCI_ShowWDL", Option(false)); options.add( // "SyzygyPath", Option("", [](const Option& o) { Tablebases::init(o); return std::nullopt; })); options.add("SyzygyProbeDepth", Option(1, 1, 100)); options.add("Syzygy50MoveRule", Option(true)); options.add("SyzygyProbeLimit", Option(7, 0, 7)); options.add( // "EvalFile", Option(EvalFileDefaultNameBig, [this](const Option& o) { load_big_network(o); return std::nullopt; })); options.add( // "EvalFileSmall", Option(EvalFileDefaultNameSmall, [this](const Option& o) { load_small_network(o); return std::nullopt; })); threads.clear(); threads.ensure_network_replicated(); resize_threads(); } std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { verify_networks(); return Benchmark::perft(fen, depth, isChess960); } void Engine::go(Search::LimitsType& limits) { assert(limits.perft == 0); verify_networks(); threads.start_thinking(options, pos, states, limits); } void Engine::stop() { threads.stop = true; } void Engine::search_clear() { wait_for_search_finished(); tt.clear(threads); threads.clear(); // @TODO wont work with multiple instances Tablebases::init(options["SyzygyPath"]); // Free mapped files } void Engine::set_on_update_no_moves(std::function&& f) { updateContext.onUpdateNoMoves = std::move(f); } void Engine::set_on_update_full(std::function&& f) { updateContext.onUpdateFull = std::move(f); } void Engine::set_on_iter(std::function&& f) { updateContext.onIter = std::move(f); } void Engine::set_on_bestmove(std::function&& f) { updateContext.onBestmove = std::move(f); } void Engine::set_on_verify_networks(std::function&& f) { onVerifyNetworks = std::move(f); } void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); } std::optional Engine::set_position(const std::string& fen, const std::vector& moves) { // Drop the old state and create a new one states = StateListPtr(new std::deque(1)); auto err = pos.set(fen, options["UCI_Chess960"], &states->back()); if (err.has_value()) return err; for (const auto& move : moves) { auto m = UCIEngine::to_move(pos, move); if (m == Move::none()) return PositionSetError("Illegal move: " + move); states->emplace_back(); pos.do_move(m, states->back()); } return std::nullopt; } // modifiers void Engine::set_numa_config_from_option(const std::string& o) { if (o == "auto" || o == "system") { numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy)); } else if (o == "hardware") { // Don't respect affinity set in the system. numaContext.set_numa_config(NumaConfig::from_system(DefaultNumaPolicy, false)); } else if (o == "none") { numaContext.set_numa_config(NumaConfig{}); } else { numaContext.set_numa_config(NumaConfig::from_string(o)); } // Force reallocation of threads in case affinities need to change. resize_threads(); threads.ensure_network_replicated(); } void Engine::resize_threads() { threads.wait_for_search_finished(); threads.set(numaContext.get_numa_config(), {options, threads, tt, sharedHists, networks}, updateContext); // Reallocate the hash with the new threadpool size set_tt_size(options["Hash"]); threads.ensure_network_replicated(); } void Engine::set_tt_size(size_t mb) { wait_for_search_finished(); tt.resize(mb, threads); } void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; } // network related void Engine::verify_networks() const { networks->big.verify(options["EvalFile"], onVerifyNetworks); networks->small.verify(options["EvalFileSmall"], onVerifyNetworks); auto statuses = networks.get_status_and_errors(); for (size_t i = 0; i < statuses.size(); ++i) { const auto [status, error] = statuses[i]; std::string message = "Network replica " + std::to_string(i + 1) + ": "; if (status == SystemWideSharedConstantAllocationStatus::NoAllocation) { message += "No allocation."; } else if (status == SystemWideSharedConstantAllocationStatus::LocalMemory) { message += "Local memory."; } else if (status == SystemWideSharedConstantAllocationStatus::SharedMemory) { message += "Shared memory."; } else { message += "Unknown status."; } if (error.has_value()) { message += " " + *error; } onVerifyNetworks(message); } } std::unique_ptr Engine::get_default_networks() const { auto networks_ = std::make_unique(NN::EvalFile{EvalFileDefaultNameBig, "None", ""}, NN::EvalFile{EvalFileDefaultNameSmall, "None", ""}); networks_->big.load(binaryDirectory, ""); networks_->small.load(binaryDirectory, ""); return networks_; } void Engine::load_big_network(const std::string& file) { networks.modify_and_replicate( [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); }); threads.clear(); threads.ensure_network_replicated(); } void Engine::load_small_network(const std::string& file) { networks.modify_and_replicate( [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); }); threads.clear(); threads.ensure_network_replicated(); } void Engine::save_network(const std::pair, std::string> files[2]) { networks.modify_and_replicate([&files](NN::Networks& networks_) { networks_.big.save(files[0].first); networks_.small.save(files[1].first); }); } // utility functions void Engine::trace_eval() const { StateListPtr trace_states(new std::deque(1)); Position p; p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back()); verify_networks(); sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl; } const OptionsMap& Engine::get_options() const { return options; } OptionsMap& Engine::get_options() { return options; } std::string Engine::fen() const { return pos.fen(); } void Engine::flip() { pos.flip(); } std::string Engine::visualize() const { std::stringstream ss; ss << pos; return ss.str(); } int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); } std::vector> Engine::get_bound_thread_count_by_numa_node() const { auto counts = threads.get_bound_thread_count_by_numa_node(); const NumaConfig& cfg = numaContext.get_numa_config(); std::vector> ratios; NumaIndex n = 0; for (; n < counts.size(); ++n) ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n)); if (!counts.empty()) for (; n < cfg.num_numa_nodes(); ++n) ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n)); return ratios; } std::string Engine::get_numa_config_as_string() const { return numaContext.get_numa_config().to_string(); } std::string Engine::numa_config_information_as_string() const { auto cfgStr = get_numa_config_as_string(); return "Available processors: " + cfgStr; } std::string Engine::thread_binding_information_as_string() const { auto boundThreadsByNode = get_bound_thread_count_by_numa_node(); std::stringstream ss; if (boundThreadsByNode.empty()) return ss.str(); bool isFirst = true; for (auto&& [current, total] : boundThreadsByNode) { if (!isFirst) ss << ":"; ss << current << "/" << total; isFirst = false; } return ss.str(); } std::string Engine::thread_allocation_information_as_string() const { std::stringstream ss; size_t threadsSize = threads.size(); ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread"); auto boundThreadsByNodeStr = thread_binding_information_as_string(); if (boundThreadsByNodeStr.empty()) return ss.str(); ss << " with NUMA node thread binding: "; ss << boundThreadsByNodeStr; return ss.str(); } } ================================================ FILE: src/engine.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef ENGINE_H_INCLUDED #define ENGINE_H_INCLUDED #include #include #include #include #include #include #include #include #include #include #include "history.h" #include "nnue/network.h" #include "numa.h" #include "position.h" #include "search.h" #include "syzygy/tbprobe.h" // for Stockfish::Depth #include "thread.h" #include "tt.h" #include "ucioption.h" namespace Stockfish { class Engine { public: using InfoShort = Search::InfoShort; using InfoFull = Search::InfoFull; using InfoIter = Search::InfoIteration; Engine(std::optional path = std::nullopt); // Cannot be movable due to components holding backreferences to fields Engine(const Engine&) = delete; Engine(Engine&&) = delete; Engine& operator=(const Engine&) = delete; Engine& operator=(Engine&&) = delete; ~Engine() { wait_for_search_finished(); } std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); // non blocking call to start searching void go(Search::LimitsType&); // non blocking call to stop searching void stop(); // blocking call to wait for search to finish void wait_for_search_finished(); // set a new position, moves are in UCI format std::optional set_position(const std::string& fen, const std::vector& moves); // modifiers void set_numa_config_from_option(const std::string& o); void resize_threads(); void set_tt_size(size_t mb); void set_ponderhit(bool); void search_clear(); void set_on_update_no_moves(std::function&&); void set_on_update_full(std::function&&); void set_on_iter(std::function&&); void set_on_bestmove(std::function&&); void set_on_verify_networks(std::function&&); // network related void verify_networks() const; std::unique_ptr get_default_networks() const; void load_big_network(const std::string& file); void load_small_network(const std::string& file); void save_network(const std::pair, std::string> files[2]); // utility functions void trace_eval() const; const OptionsMap& get_options() const; OptionsMap& get_options(); int get_hashfull(int maxAge = 0) const; std::string fen() const; void flip(); std::string visualize() const; std::vector> get_bound_thread_count_by_numa_node() const; std::string get_numa_config_as_string() const; std::string numa_config_information_as_string() const; std::string thread_allocation_information_as_string() const; std::string thread_binding_information_as_string() const; private: const std::string binaryDirectory; NumaReplicationContext numaContext; Position pos; StateListPtr states; OptionsMap options; ThreadPool threads; TranspositionTable tt; LazyNumaReplicatedSystemWide networks; Search::SearchManager::UpdateContext updateContext; std::function onVerifyNetworks; std::map sharedHists; }; } // namespace Stockfish #endif // #ifndef ENGINE_H_INCLUDED ================================================ FILE: src/evaluate.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "evaluate.h" #include #include #include #include #include #include #include #include #include #include "nnue/network.h" #include "nnue/nnue_misc.h" #include "position.h" #include "types.h" #include "uci.h" #include "nnue/nnue_accumulator.h" namespace Stockfish { // Returns a static, purely materialistic evaluation of the position from // the point of view of the side to move. It can be divided by PawnValue to get // an approximation of the material advantage on the board in terms of pawns. int Eval::simple_eval(const Position& pos) { Color c = pos.side_to_move(); return PawnValue * (pos.count(c) - pos.count(~c)) + pos.non_pawn_material(c) - pos.non_pawn_material(~c); } bool Eval::use_smallnet(const Position& pos) { return std::abs(simple_eval(pos)) > 962; } // Evaluate is the evaluator for the outer world. It returns a static evaluation // of the position from the point of view of the side to move. Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, Eval::NNUE::AccumulatorStack& accumulators, Eval::NNUE::AccumulatorCaches& caches, int optimism) { assert(!pos.checkers()); bool smallNet = use_smallnet(pos); auto [psqt, positional] = smallNet ? networks.small.evaluate(pos, accumulators, caches.small) : networks.big.evaluate(pos, accumulators, caches.big); Value nnue = (125 * psqt + 131 * positional) / 128; // Re-evaluate the position when higher eval accuracy is worth the time spent if (smallNet && (std::abs(nnue) < 277)) { std::tie(psqt, positional) = networks.big.evaluate(pos, accumulators, caches.big); nnue = (125 * psqt + 131 * positional) / 128; smallNet = false; } // Blend optimism and eval with nnue complexity int nnueComplexity = std::abs(psqt - positional); optimism += optimism * nnueComplexity / 476; nnue -= nnue * nnueComplexity / 18236; int material = 534 * pos.count() + pos.non_pawn_material(); int v = (nnue * (77871 + material) + optimism * (7191 + material)) / 77871; // Damp down the evaluation linearly when shuffling v -= v * pos.rule50_count() / 199; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); return v; } // Like evaluate(), but instead of returning a value, it returns // a string (suitable for outputting to stdout) that contains the detailed // descriptions and values of each evaluation term. Useful for debugging. // Trace scores are from white's point of view std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) { if (pos.checkers()) return "Final evaluation: none (in check)"; auto accumulators = std::make_unique(); auto caches = std::make_unique(networks); std::stringstream ss; ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n'; ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches->big); Value v = psqt + positional; v = pos.side_to_move() == WHITE ? v : -v; ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n"; v = evaluate(networks, pos, *accumulators, *caches, VALUE_ZERO); v = pos.side_to_move() == WHITE ? v : -v; ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)"; ss << " [with scaled NNUE, ...]"; ss << "\n"; return ss.str(); } } // namespace Stockfish ================================================ FILE: src/evaluate.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef EVALUATE_H_INCLUDED #define EVALUATE_H_INCLUDED #include #include "types.h" namespace Stockfish { class Position; namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro or the location where this macro is defined, as it is used // in the Makefile/Fishtest. #define EvalFileDefaultNameBig "nn-9a0cc2a62c52.nnue" #define EvalFileDefaultNameSmall "nn-47fc8b7fff06.nnue" namespace NNUE { struct Networks; struct AccumulatorCaches; class AccumulatorStack; } std::string trace(Position& pos, const Eval::NNUE::Networks& networks); int simple_eval(const Position& pos); bool use_smallnet(const Position& pos); Value evaluate(const NNUE::Networks& networks, const Position& pos, Eval::NNUE::AccumulatorStack& accumulators, Eval::NNUE::AccumulatorCaches& caches, int optimism); } // namespace Eval } // namespace Stockfish #endif // #ifndef EVALUATE_H_INCLUDED ================================================ FILE: src/history.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef HISTORY_H_INCLUDED #define HISTORY_H_INCLUDED #include #include #include #include #include #include #include #include #include // IWYU pragma: keep #include "memory.h" #include "misc.h" #include "position.h" namespace Stockfish { constexpr int PAWN_HISTORY_BASE_SIZE = 8192; // has to be a power of 2 constexpr int UINT_16_HISTORY_SIZE = std::numeric_limits::max() + 1; constexpr int CORRHIST_BASE_SIZE = UINT_16_HISTORY_SIZE; constexpr int CORRECTION_HISTORY_LIMIT = 1024; constexpr int LOW_PLY_HISTORY_SIZE = 5; static_assert((PAWN_HISTORY_BASE_SIZE & (PAWN_HISTORY_BASE_SIZE - 1)) == 0, "PAWN_HISTORY_BASE_SIZE has to be a power of 2"); static_assert((CORRHIST_BASE_SIZE & (CORRHIST_BASE_SIZE - 1)) == 0, "CORRHIST_BASE_SIZE has to be a power of 2"); // StatsEntry is the container of various numerical statistics. We use a class // instead of a naked value to directly call history update operator<<() on // the entry. The first template parameter T is the base type of the array, // and the second template parameter D limits the range of updates in [-D, D] // when we update values with the << operator template struct StatsEntry { static_assert(std::is_arithmetic_v, "Not an arithmetic type"); private: std::conditional_t, T> entry; public: void operator=(const T& v) { if constexpr (Atomic) entry.store(v, std::memory_order_relaxed); else entry = v; } operator T() const { if constexpr (Atomic) return entry.load(std::memory_order_relaxed); else return entry; } void operator<<(int bonus) { // Make sure that bonus is in range [-D, D] int clampedBonus = std::clamp(bonus, -D, D); T val = *this; *this = val + clampedBonus - val * std::abs(clampedBonus) / D; assert(std::abs(T(*this)) <= D); } }; enum StatsType { NoCaptures, Captures }; template using Stats = MultiArray, Sizes...>; template using AtomicStats = MultiArray, Sizes...>; // DynStats is a dynamically sized array of Stats, used for thread-shared histories // which should scale with the total number of threads. The SizeMultiplier gives // the per-thread allocation count of T. template struct DynStats { explicit DynStats(size_t s) { size = s * SizeMultiplier; data = make_unique_large_page(size); } // Sets all values in the range to 0 void clear_range(int value, size_t threadIdx, size_t numaTotal) { size_t start = uint64_t(threadIdx) * size / numaTotal; assert(start < size); size_t end = threadIdx + 1 == numaTotal ? size : uint64_t(threadIdx + 1) * size / numaTotal; while (start < end) data[start++].fill(value); } size_t get_size() const { return size; } T& operator[](size_t index) { assert(index < size); return data.get()[index]; } const T& operator[](size_t index) const { assert(index < size); return data.get()[index]; } private: size_t size; LargePagePtr data; }; // ButterflyHistory records how often quiet moves have been successful or unsuccessful // during the current search, and is used for reduction and move ordering decisions. // It uses 2 tables (one for each color) indexed by the move's from and to squares, // see https://www.chessprogramming.org/Butterfly_Boards using ButterflyHistory = Stats; // LowPlyHistory is addressed by ply and move's from and to squares, used // to improve move ordering near the root using LowPlyHistory = Stats; // CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] using CapturePieceToHistory = Stats; // PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] using PieceToHistory = Stats; // ContinuationHistory is the combined history of a given pair of moves, usually // the current one given a previous one. The nested history table is based on // PieceToHistory instead of ButterflyBoards. using ContinuationHistory = MultiArray; // PawnHistory is addressed by the pawn structure and a move's [piece][to] using PawnHistory = DynStats, PAWN_HISTORY_BASE_SIZE>; // Correction histories record differences between the static evaluation of // positions and their search score. It is used to improve the static evaluation // used by some search heuristics. // see https://www.chessprogramming.org/Static_Evaluation_Correction_History enum CorrHistType { Pawn, // By color and pawn structure Minor, // By color and positions of minor pieces (Knight, Bishop) NonPawn, // By non-pawn material positions and color PieceTo, // By [piece][to] move Continuation, // Combined history of move pairs }; template struct CorrectionBundle { StatsEntry pawn; StatsEntry minor; StatsEntry nonPawnWhite; StatsEntry nonPawnBlack; void operator=(T val) { pawn = val; minor = val; nonPawnWhite = val; nonPawnBlack = val; } }; namespace Detail { template struct CorrHistTypedef { using type = DynStats, CORRHIST_BASE_SIZE>; }; template<> struct CorrHistTypedef { using type = Stats; }; template<> struct CorrHistTypedef { using type = MultiArray::type, PIECE_NB, SQUARE_NB>; }; template<> struct CorrHistTypedef { using type = DynStats, CORRHIST_BASE_SIZE>; }; } using UnifiedCorrectionHistory = DynStats, COLOR_NB>, CORRHIST_BASE_SIZE>; template using CorrectionHistory = typename Detail::CorrHistTypedef::type; using TTMoveHistory = StatsEntry; // Set of histories shared between groups of threads. To avoid excessive // cross-node data transfer, histories are shared only between threads // on a given NUMA node. The passed size must be a power of two to make // the indexing more efficient. struct SharedHistories { SharedHistories(size_t threadCount) : correctionHistory(threadCount), pawnHistory(threadCount) { assert((threadCount & (threadCount - 1)) == 0 && threadCount != 0); sizeMinus1 = correctionHistory.get_size() - 1; pawnHistSizeMinus1 = pawnHistory.get_size() - 1; } size_t get_size() const { return sizeMinus1 + 1; } auto& pawn_entry(const Position& pos) { return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1]; } const auto& pawn_entry(const Position& pos) const { return pawnHistory[pos.pawn_key() & pawnHistSizeMinus1]; } auto& pawn_correction_entry(const Position& pos) { return correctionHistory[pos.pawn_key() & sizeMinus1]; } const auto& pawn_correction_entry(const Position& pos) const { return correctionHistory[pos.pawn_key() & sizeMinus1]; } auto& minor_piece_correction_entry(const Position& pos) { return correctionHistory[pos.minor_piece_key() & sizeMinus1]; } const auto& minor_piece_correction_entry(const Position& pos) const { return correctionHistory[pos.minor_piece_key() & sizeMinus1]; } template auto& nonpawn_correction_entry(const Position& pos) { return correctionHistory[pos.non_pawn_key(c) & sizeMinus1]; } template const auto& nonpawn_correction_entry(const Position& pos) const { return correctionHistory[pos.non_pawn_key(c) & sizeMinus1]; } UnifiedCorrectionHistory correctionHistory; PawnHistory pawnHistory; private: size_t sizeMinus1, pawnHistSizeMinus1; }; } // namespace Stockfish #endif // #ifndef HISTORY_H_INCLUDED ================================================ FILE: src/incbin/UNLICENCE ================================================ The file "incbin.h" is free and unencumbered software released into the public domain by Dale Weiler, see: Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to ================================================ FILE: src/incbin/incbin.h ================================================ /** * @file incbin.h * @author Dale Weiler * @brief Utility for including binary files * * Facilities for including binary files into the current translation unit and * making use from them externally in other translation units. */ #ifndef INCBIN_HDR #define INCBIN_HDR #include #if defined(__AVX512BW__) || \ defined(__AVX512CD__) || \ defined(__AVX512DQ__) || \ defined(__AVX512ER__) || \ defined(__AVX512PF__) || \ defined(__AVX512VL__) || \ defined(__AVX512F__) # define INCBIN_ALIGNMENT_INDEX 6 #elif defined(__AVX__) || \ defined(__AVX2__) # define INCBIN_ALIGNMENT_INDEX 5 #elif defined(__SSE__) || \ defined(__SSE2__) || \ defined(__SSE3__) || \ defined(__SSSE3__) || \ defined(__SSE4_1__) || \ defined(__SSE4_2__) || \ defined(__neon__) || \ defined(__ARM_NEON) || \ defined(__ALTIVEC__) # define INCBIN_ALIGNMENT_INDEX 4 #elif ULONG_MAX != 0xffffffffu # define INCBIN_ALIGNMENT_INDEX 3 # else # define INCBIN_ALIGNMENT_INDEX 2 #endif /* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ #define INCBIN_ALIGN_SHIFT_0 1 #define INCBIN_ALIGN_SHIFT_1 2 #define INCBIN_ALIGN_SHIFT_2 4 #define INCBIN_ALIGN_SHIFT_3 8 #define INCBIN_ALIGN_SHIFT_4 16 #define INCBIN_ALIGN_SHIFT_5 32 #define INCBIN_ALIGN_SHIFT_6 64 /* Actual alignment value */ #define INCBIN_ALIGNMENT \ INCBIN_CONCATENATE( \ INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ INCBIN_ALIGNMENT_INDEX) /* Stringize */ #define INCBIN_STR(X) \ #X #define INCBIN_STRINGIZE(X) \ INCBIN_STR(X) /* Concatenate */ #define INCBIN_CAT(X, Y) \ X ## Y #define INCBIN_CONCATENATE(X, Y) \ INCBIN_CAT(X, Y) /* Deferred macro expansion */ #define INCBIN_EVAL(X) \ X #define INCBIN_INVOKE(N, ...) \ INCBIN_EVAL(N(__VA_ARGS__)) /* Variable argument count for overloading by arity */ #define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N #define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0) /* Green Hills uses a different directive for including binary data */ #if defined(__ghs__) # if (__ghs_asm == 2) # define INCBIN_MACRO ".file" /* Or consider the ".myrawdata" entry in the ld file */ # else # define INCBIN_MACRO "\tINCBIN" # endif #else # define INCBIN_MACRO ".incbin" #endif #ifndef _MSC_VER # define INCBIN_ALIGN \ __attribute__((aligned(INCBIN_ALIGNMENT))) #else # define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) #endif #if defined(__arm__) || /* GNU C and RealView */ \ defined(__arm) || /* Diab */ \ defined(_ARM) /* ImageCraft */ # define INCBIN_ARM #endif #ifdef __GNUC__ /* Utilize .balign where supported */ # define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" # define INCBIN_ALIGN_BYTE ".balign 1\n" #elif defined(INCBIN_ARM) /* * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is * the shift count. This is the value passed to `.align' */ # define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" # define INCBIN_ALIGN_BYTE ".align 0\n" #else /* We assume other inline assembler's treat `.align' as `.balign' */ # define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" # define INCBIN_ALIGN_BYTE ".align 1\n" #endif /* INCBIN_CONST is used by incbin.c generated files */ #if defined(__cplusplus) # define INCBIN_EXTERNAL extern "C" # define INCBIN_CONST extern const #else # define INCBIN_EXTERNAL extern # define INCBIN_CONST const #endif /** * @brief Optionally override the linker section into which size and data is * emitted. * * @warning If you use this facility, you might have to deal with * platform-specific linker output section naming on your own. */ #if !defined(INCBIN_OUTPUT_SECTION) # if defined(__APPLE__) # define INCBIN_OUTPUT_SECTION ".const_data" # else # define INCBIN_OUTPUT_SECTION ".rodata" # endif #endif /** * @brief Optionally override the linker section into which data is emitted. * * @warning If you use this facility, you might have to deal with * platform-specific linker output section naming on your own. */ #if !defined(INCBIN_OUTPUT_DATA_SECTION) # define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION #endif /** * @brief Optionally override the linker section into which size is emitted. * * @warning If you use this facility, you might have to deal with * platform-specific linker output section naming on your own. * * @note This is useful for Harvard architectures where program memory cannot * be directly read from the program without special instructions. With this you * can chose to put the size variable in RAM rather than ROM. */ #if !defined(INCBIN_OUTPUT_SIZE_SECTION) # define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION #endif #if defined(__APPLE__) # include "TargetConditionals.h" # if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING) # warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." # endif /* The directives are different for Apple branded compilers */ # define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" # define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" # define INCBIN_INT ".long " # define INCBIN_MANGLE "_" # define INCBIN_BYTE ".byte " # define INCBIN_TYPE(...) #else # define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" # define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" # if defined(__ghs__) # define INCBIN_INT ".word " # else # define INCBIN_INT ".int " # endif # if defined(__USER_LABEL_PREFIX__) # define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) # else # define INCBIN_MANGLE "" # endif # if defined(INCBIN_ARM) /* On arm assemblers, `@' is used as a line comment token */ # define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" # elif defined(__MINGW32__) || defined(__MINGW64__) /* Mingw doesn't support this directive either */ # define INCBIN_TYPE(NAME) # else /* It's safe to use `@' on other architectures */ # define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" # endif # define INCBIN_BYTE ".byte " #endif /* List of style types used for symbol names */ #define INCBIN_STYLE_CAMEL 0 #define INCBIN_STYLE_SNAKE 1 /** * @brief Specify the prefix to use for symbol names. * * @note By default this is "g". * * @code * #define INCBIN_PREFIX incbin * #include "incbin.h" * INCBIN(Foo, "foo.txt"); * * // Now you have the following symbols instead: * // const unsigned char incbinFoo[]; * // const unsigned char *const incbinFoo; * // const unsigned int incbinFoo; * @endcode */ #if !defined(INCBIN_PREFIX) # define INCBIN_PREFIX g #endif /** * @brief Specify the style used for symbol names. * * Possible options are * - INCBIN_STYLE_CAMEL "CamelCase" * - INCBIN_STYLE_SNAKE "snake_case" * * @note By default this is INCBIN_STYLE_CAMEL * * @code * #define INCBIN_STYLE INCBIN_STYLE_SNAKE * #include "incbin.h" * INCBIN(foo, "foo.txt"); * * // Now you have the following symbols: * // const unsigned char foo_data[]; * // const unsigned char *const foo_end; * // const unsigned int foo_size; * @endcode */ #if !defined(INCBIN_STYLE) # define INCBIN_STYLE INCBIN_STYLE_CAMEL #endif /* Style lookup tables */ #define INCBIN_STYLE_0_DATA Data #define INCBIN_STYLE_0_END End #define INCBIN_STYLE_0_SIZE Size #define INCBIN_STYLE_1_DATA _data #define INCBIN_STYLE_1_END _end #define INCBIN_STYLE_1_SIZE _size /* Style lookup: returning identifier */ #define INCBIN_STYLE_IDENT(TYPE) \ INCBIN_CONCATENATE( \ INCBIN_STYLE_, \ INCBIN_CONCATENATE( \ INCBIN_EVAL(INCBIN_STYLE), \ INCBIN_CONCATENATE(_, TYPE))) /* Style lookup: returning string literal */ #define INCBIN_STYLE_STRING(TYPE) \ INCBIN_STRINGIZE( \ INCBIN_STYLE_IDENT(TYPE)) \ /* Generate the global labels by indirectly invoking the macro with our style * type and concatenating the name against them. */ #define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ INCBIN_INVOKE( \ INCBIN_GLOBAL, \ INCBIN_CONCATENATE( \ NAME, \ INCBIN_INVOKE( \ INCBIN_STYLE_IDENT, \ TYPE))) \ INCBIN_INVOKE( \ INCBIN_TYPE, \ INCBIN_CONCATENATE( \ NAME, \ INCBIN_INVOKE( \ INCBIN_STYLE_IDENT, \ TYPE))) /** * @brief Externally reference binary data included in another translation unit. * * Produces three external symbols that reference the binary data included in * another translation unit. * * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with * "Data", as well as "End" and "Size" after. An example is provided below. * * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. * @param NAME The name given for the binary data * * @code * INCBIN_EXTERN(Foo); * * // Now you have the following symbols: * // extern const unsigned char Foo[]; * // extern const unsigned char *const Foo; * // extern const unsigned int Foo; * @endcode * * You may specify a custom optional data type as well as the first argument. * @code * INCBIN_EXTERN(custom_type, Foo); * * // Now you have the following symbols: * // extern const custom_type Foo[]; * // extern const custom_type *const Foo; * // extern const unsigned int Foo; * @endcode */ #define INCBIN_EXTERN(...) \ INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) #define INCBIN_EXTERN_1(NAME, ...) \ INCBIN_EXTERN_2(unsigned char, NAME) #define INCBIN_EXTERN_2(TYPE, NAME) \ INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \ INCBIN_CONCATENATE( \ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ INCBIN_STYLE_IDENT(DATA))[]; \ INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \ INCBIN_CONCATENATE( \ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ INCBIN_STYLE_IDENT(END)); \ INCBIN_EXTERNAL const unsigned int \ INCBIN_CONCATENATE( \ INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ INCBIN_STYLE_IDENT(SIZE)) /** * @brief Externally reference textual data included in another translation unit. * * Produces three external symbols that reference the textual data included in * another translation unit. * * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with * "Data", as well as "End" and "Size" after. An example is provided below. * * @param NAME The name given for the textual data * * @code * INCBIN_EXTERN(Foo); * * // Now you have the following symbols: * // extern const char Foo[]; * // extern const char *const Foo; * // extern const unsigned int Foo; * @endcode */ #define INCTXT_EXTERN(NAME) \ INCBIN_EXTERN_2(char, NAME) /** * @brief Include a binary file into the current translation unit. * * Includes a binary file into the current translation unit, producing three symbols * for objects that encode the data and size respectively. * * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with * "Data", as well as "End" and "Size" after. An example is provided below. * * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`. * @param NAME The name to associate with this binary data (as an identifier.) * @param FILENAME The file to include (as a string literal.) * * @code * INCBIN(Icon, "icon.png"); * * // Now you have the following symbols: * // const unsigned char Icon[]; * // const unsigned char *const Icon; * // const unsigned int Icon; * @endcode * * You may specify a custom optional data type as well as the first argument. * These macros are specialized by arity. * @code * INCBIN(custom_type, Icon, "icon.png"); * * // Now you have the following symbols: * // const custom_type Icon[]; * // const custom_type *const Icon; * // const unsigned int Icon; * @endcode * * @warning This must be used in global scope * @warning The identifiers may be different if INCBIN_STYLE is not default * * To externally reference the data included by this in another translation unit * please @see INCBIN_EXTERN. */ #ifdef _MSC_VER # define INCBIN(NAME, FILENAME) \ INCBIN_EXTERN(NAME) #else # define INCBIN(...) \ INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__) # if defined(__GNUC__) # define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"") # elif defined(__clang__) # define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"") # else # define INCBIN_1(...) /* Cannot do anything here */ # endif # define INCBIN_2(NAME, FILENAME) \ INCBIN_3(unsigned char, NAME, FILENAME) # define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */) # define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \ __asm__(INCBIN_SECTION \ INCBIN_GLOBAL_LABELS(NAME, DATA) \ INCBIN_ALIGN_HOST \ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ INCBIN_MACRO " \"" FILENAME "\"\n" \ TERMINATOR \ INCBIN_GLOBAL_LABELS(NAME, END) \ INCBIN_ALIGN_BYTE \ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ INCBIN_BYTE "1\n" \ INCBIN_GLOBAL_LABELS(NAME, SIZE) \ INCBIN_ALIGN_HOST \ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ INCBIN_ALIGN_HOST \ ".text\n" \ ); \ INCBIN_EXTERN(TYPE, NAME) #endif /** * @brief Include a textual file into the current translation unit. * * This behaves the same as INCBIN except it produces char compatible arrays * and implicitly adds a null-terminator byte, thus the size of data included * by this is one byte larger than that of INCBIN. * * Includes a textual file into the current translation unit, producing three * symbols for objects that encode the data and size respectively. * * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with * "Data", as well as "End" and "Size" after. An example is provided below. * * @param NAME The name to associate with this binary data (as an identifier.) * @param FILENAME The file to include (as a string literal.) * * @code * INCTXT(Readme, "readme.txt"); * * // Now you have the following symbols: * // const char Readme[]; * // const char *const Readme; * // const unsigned int Readme; * @endcode * * @warning This must be used in global scope * @warning The identifiers may be different if INCBIN_STYLE is not default * * To externally reference the data included by this in another translation unit * please @see INCBIN_EXTERN. */ #if defined(_MSC_VER) # define INCTXT(NAME, FILENAME) \ INCBIN_EXTERN(NAME) #else # define INCTXT(NAME, FILENAME) \ INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n") #endif #endif ================================================ FILE: src/main.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include "bitboard.h" #include "misc.h" #include "position.h" #include "tune.h" #include "uci.h" using namespace Stockfish; int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; Bitboards::init(); Position::init(); auto uci = std::make_unique(argc, argv); Tune::init(uci->engine_options()); uci->loop(); return 0; } ================================================ FILE: src/memory.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "memory.h" #include #if __has_include("features.h") #include #endif #if defined(__linux__) && !defined(__ANDROID__) #include #endif #if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ || defined(__e2k__) #define POSIXALIGNEDALLOC #include #endif #ifdef _WIN32 #if _WIN32_WINNT < 0x0601 #undef _WIN32_WINNT #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes #endif #ifndef NOMINMAX #define NOMINMAX #endif #include // std::hex, std::dec #include // std::cerr #include // std::endl #include // The needed Windows API for processor groups could be missed from old Windows // versions, so instead of calling them directly (forcing the linker to resolve // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. #endif namespace Stockfish { // Wrappers for systems where the c++17 implementation does not guarantee the // availability of aligned_alloc(). Memory allocated with std_aligned_alloc() // must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(_ISOC11_SOURCE) return aligned_alloc(alignment, size); #elif defined(POSIXALIGNEDALLOC) void* mem = nullptr; posix_memalign(&mem, alignment, size); return mem; #elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) return _mm_malloc(size, alignment); #elif defined(_WIN32) return _aligned_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); #endif } void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) free(ptr); #elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) _mm_free(ptr); #elif defined(_WIN32) _aligned_free(ptr); #else free(ptr); #endif } // aligned_large_pages_alloc() will return suitably aligned memory, // if possible using large pages. #if defined(_WIN32) static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { return windows_try_with_large_page_priviliges( [&](size_t largePageSize) { // Round up size to full pages and allocate allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); return VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); }, []() { return (void*) nullptr; }); } void* aligned_large_pages_alloc(size_t allocSize) { // Try to allocate large pages void* mem = aligned_large_pages_alloc_windows(allocSize); // Fall back to regular, page-aligned, allocation if necessary if (!mem) mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); return mem; } #else void* aligned_large_pages_alloc(size_t allocSize) { #if defined(__linux__) constexpr size_t alignment = 2 * 1024 * 1024; // 2MB page size assumed #else constexpr size_t alignment = 4096; // small page size assumed #endif // Round up to multiples of alignment size_t size = ((allocSize + alignment - 1) / alignment) * alignment; void* mem = std_aligned_alloc(alignment, size); #if defined(MADV_HUGEPAGE) madvise(mem, size, MADV_HUGEPAGE); #endif return mem; } #endif bool has_large_pages() { #if defined(_WIN32) constexpr size_t page_size = 2 * 1024 * 1024; // 2MB page size assumed void* mem = aligned_large_pages_alloc_windows(page_size); if (mem == nullptr) { return false; } else { aligned_large_pages_free(mem); return true; } #elif defined(__linux__) #if defined(MADV_HUGEPAGE) return true; #else return false; #endif #else return false; #endif } // aligned_large_pages_free() will free the previously memory allocated // by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr. #if defined(_WIN32) void aligned_large_pages_free(void* mem) { if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { DWORD err = GetLastError(); std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err << std::dec << std::endl; exit(EXIT_FAILURE); } } #else void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } #endif } // namespace Stockfish ================================================ FILE: src/memory.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef MEMORY_H_INCLUDED #define MEMORY_H_INCLUDED #include #include #include #include #include #include #include #include "types.h" #if defined(_WIN64) #if _WIN32_WINNT < 0x0601 #undef _WIN32_WINNT #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes #endif #if !defined(NOMINMAX) #define NOMINMAX #endif #include // Some Windows headers (RPC/old headers) define short macros such // as 'small' expanding to 'char', which breaks identifiers in the code. // Undefine those macros immediately after including . #ifdef small #undef small #endif #include extern "C" { using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE); using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID); using AdjustTokenPrivileges_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif namespace Stockfish { void* std_aligned_alloc(size_t alignment, size_t size); void std_aligned_free(void* ptr); // Memory aligned by page size, min alignment: 4096 bytes void* aligned_large_pages_alloc(size_t size); void aligned_large_pages_free(void* mem); bool has_large_pages(); // Frees memory which was placed there with placement new. // Works for both single objects and arrays of unknown bound. template void memory_deleter(T* ptr, FREE_FUNC free_func) { if (!ptr) return; // Explicitly needed to call the destructor if constexpr (!std::is_trivially_destructible_v) ptr->~T(); free_func(ptr); } // Frees memory which was placed there with placement new. // Works for both single objects and arrays of unknown bound. template void memory_deleter_array(T* ptr, FREE_FUNC free_func) { if (!ptr) return; // Move back on the pointer to where the size is allocated const size_t array_offset = std::max(sizeof(size_t), alignof(T)); char* raw_memory = reinterpret_cast(ptr) - array_offset; if constexpr (!std::is_trivially_destructible_v) { const size_t size = *reinterpret_cast(raw_memory); // Explicitly call the destructor for each element in reverse order for (size_t i = size; i-- > 0;) ptr[i].~T(); } free_func(raw_memory); } // Allocates memory for a single object and places it there with placement new template inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC alloc_func, Args&&... args) { void* raw_memory = alloc_func(sizeof(T)); ASSERT_ALIGNED(raw_memory, alignof(T)); return new (raw_memory) T(std::forward(args)...); } // Allocates memory for an array of unknown bound and places it there with placement new template inline std::enable_if_t, std::remove_extent_t*> memory_allocator(ALLOC_FUNC alloc_func, size_t num) { using ElementType = std::remove_extent_t; const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType)); // Save the array size in the memory location char* raw_memory = reinterpret_cast(alloc_func(array_offset + num * sizeof(ElementType))); ASSERT_ALIGNED(raw_memory, alignof(T)); new (raw_memory) size_t(num); for (size_t i = 0; i < num; ++i) new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType(); // Need to return the pointer at the start of the array so that // the indexing in unique_ptr works. return reinterpret_cast(raw_memory + array_offset); } // // // aligned large page unique ptr // // template struct LargePageDeleter { void operator()(T* ptr) const { return memory_deleter(ptr, aligned_large_pages_free); } }; template struct LargePageArrayDeleter { void operator()(T* ptr) const { return memory_deleter_array(ptr, aligned_large_pages_free); } }; template using LargePagePtr = std::conditional_t, std::unique_ptr>>, std::unique_ptr>>; // make_unique_large_page for single objects template std::enable_if_t, LargePagePtr> make_unique_large_page(Args&&... args) { static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); T* obj = memory_allocator(aligned_large_pages_alloc, std::forward(args)...); return LargePagePtr(obj); } // make_unique_large_page for arrays of unknown bound template std::enable_if_t, LargePagePtr> make_unique_large_page(size_t num) { using ElementType = std::remove_extent_t; static_assert(alignof(ElementType) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); ElementType* memory = memory_allocator(aligned_large_pages_alloc, num); return LargePagePtr(memory); } // // // aligned unique ptr // // template struct AlignedDeleter { void operator()(T* ptr) const { return memory_deleter(ptr, std_aligned_free); } }; template struct AlignedArrayDeleter { void operator()(T* ptr) const { return memory_deleter_array(ptr, std_aligned_free); } }; template using AlignedPtr = std::conditional_t, std::unique_ptr>>, std::unique_ptr>>; // make_unique_aligned for single objects template std::enable_if_t, AlignedPtr> make_unique_aligned(Args&&... args) { const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); }; T* obj = memory_allocator(func, std::forward(args)...); return AlignedPtr(obj); } // make_unique_aligned for arrays of unknown bound template std::enable_if_t, AlignedPtr> make_unique_aligned(size_t num) { using ElementType = std::remove_extent_t; const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); }; ElementType* memory = memory_allocator(func, num); return AlignedPtr(memory); } // Get the first aligned element of an array. // ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, // where N is the number of elements in the array. template T* align_ptr_up(T* ptr) { static_assert(alignof(T) < Alignment); const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); return reinterpret_cast( reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); } #if defined(_WIN32) template auto windows_try_with_large_page_priviliges([[maybe_unused]] FuncYesT&& fyes, FuncNoT&& fno) { #if !defined(_WIN64) return fno(); #else HANDLE hProcessToken{}; LUID luid{}; const size_t largePageSize = GetLargePageMinimum(); if (!largePageSize) return fno(); // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); if (!hAdvapi32) hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); auto OpenProcessToken_f = OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); if (!OpenProcessToken_f) return fno(); auto LookupPrivilegeValueA_f = LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); if (!LookupPrivilegeValueA_f) return fno(); auto AdjustTokenPrivileges_f = AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); if (!AdjustTokenPrivileges_f) return fno(); // We need SeLockMemoryPrivilege, so try to enable it for the process if (!OpenProcessToken_f( // OpenProcessToken() GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) return fno(); if (!LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid)) return fno(); TOKEN_PRIVILEGES tp{}; TOKEN_PRIVILEGES prevTp{}; DWORD prevTpLen = 0; tp.PrivilegeCount = 1; tp.Privileges[0].Luid = luid; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() // succeeds, we still need to query GetLastError() to ensure that the privileges // were actually obtained. if (!AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) || GetLastError() != ERROR_SUCCESS) return fno(); auto&& ret = fyes(largePageSize); // Privilege no longer needed, restore previous state AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); CloseHandle(hProcessToken); return std::forward(ret); #endif } #endif template T load_as(const ByteT* buffer) { static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); static_assert(sizeof(ByteT) == 1); T value; std::memcpy(&value, buffer, sizeof(T)); return value; } } // namespace Stockfish #endif // #ifndef MEMORY_H_INCLUDED ================================================ FILE: src/misc.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "misc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "types.h" namespace Stockfish { namespace { // Version number or dev. constexpr std::string_view version = "dev"; // Our fancy logging facility. The trick here is to replace cin.rdbuf() and // cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We // can toggle the logging of std::cout and std::cin at runtime whilst preserving // usual I/O functionality, all without changing a single line of code! // Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout Tie(std::streambuf* b, std::streambuf* l) : buf(b), logBuf(l) {} int sync() override { return logBuf->pubsync(), buf->pubsync(); } int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); } int underflow() override { return buf->sgetc(); } int uflow() override { return log(buf->sbumpc(), ">> "); } std::streambuf *buf, *logBuf; int log(int c, const char* prefix) { static int last = '\n'; // Single log file if (last == '\n') logBuf->sputn(prefix, 3); return last = logBuf->sputc(char(c)); } }; class Logger { Logger() : in(std::cin.rdbuf(), file.rdbuf()), out(std::cout.rdbuf(), file.rdbuf()) {} ~Logger() { start(""); } std::ofstream file; Tie in, out; public: static void start(const std::string& fname) { static Logger l; if (l.file.is_open()) { std::cout.rdbuf(l.out.buf); std::cin.rdbuf(l.in.buf); l.file.close(); } if (!fname.empty()) { l.file.open(fname, std::ifstream::out); if (!l.file.is_open()) { std::cerr << "Unable to open debug log file " << fname << std::endl; exit(EXIT_FAILURE); } std::cin.rdbuf(&l.in); std::cout.rdbuf(&l.out); } } }; } // namespace // Returns the full name of the current Stockfish version. // // For local dev compiles we try to append the commit SHA and // commit date from git. If that fails only the local compilation // date is set and "nogit" is specified: // Stockfish dev-YYYYMMDD-SHA // or // Stockfish dev-YYYYMMDD-nogit // // For releases (non-dev builds) we only include the version number: // Stockfish version std::string engine_version_info() { std::stringstream ss; ss << "Stockfish " << version << std::setfill('0'); if constexpr (version == "dev") { ss << "-"; #ifdef GIT_DATE ss << stringify(GIT_DATE); #else constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); std::string month, day, year; std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" date >> month >> day >> year; ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) << std::setw(2) << std::setfill('0') << day; #endif ss << "-"; #ifdef GIT_SHA ss << stringify(GIT_SHA); #else ss << "nogit"; #endif } return ss.str(); } std::string engine_info(bool to_uci) { return engine_version_info() + (to_uci ? "\nid author " : " by ") + "the Stockfish developers (see AUTHORS file)"; } // Returns a string trying to describe the compiler we use std::string compiler_info() { #define make_version_string(major, minor, patch) \ stringify(major) "." stringify(minor) "." stringify(patch) // Predefined macros hell: // // __GNUC__ Compiler is GCC, Clang or ICX // __clang__ Compiler is Clang or ICX // __INTEL_LLVM_COMPILER Compiler is ICX // _MSC_VER Compiler is MSVC // _WIN32 Building on Windows (any) // _WIN64 Building on Windows 64 bit std::string compiler = "\nCompiled by : "; #if defined(__INTEL_LLVM_COMPILER) compiler += "ICX "; compiler += stringify(__INTEL_LLVM_COMPILER); #elif defined(__clang__) compiler += "clang++ "; compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); #elif _MSC_VER compiler += "MSVC "; compiler += "(version "; compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); compiler += ")"; #elif defined(__e2k__) && defined(__LCC__) #define dot_ver2(n) \ compiler += char('.'); \ compiler += char('0' + (n) / 10); \ compiler += char('0' + (n) % 10); compiler += "MCST LCC "; compiler += "(version "; compiler += std::to_string(__LCC__ / 100); dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")"; #elif __GNUC__ compiler += "g++ (GNUC) "; compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); #else compiler += "Unknown compiler "; compiler += "(unknown version)"; #endif #if defined(__APPLE__) compiler += " on Apple"; #elif defined(__CYGWIN__) compiler += " on Cygwin"; #elif defined(__MINGW64__) compiler += " on MinGW64"; #elif defined(__MINGW32__) compiler += " on MinGW32"; #elif defined(__ANDROID__) compiler += " on Android"; #elif defined(__linux__) compiler += " on Linux"; #elif defined(_WIN64) compiler += " on Microsoft Windows 64-bit"; #elif defined(_WIN32) compiler += " on Microsoft Windows 32-bit"; #else compiler += " on unknown system"; #endif compiler += "\nCompilation architecture : "; #if defined(ARCH) compiler += stringify(ARCH); #else compiler += "(undefined architecture)"; #endif compiler += "\nCompilation settings : "; compiler += (Is64Bit ? "64bit" : "32bit"); #if defined(USE_AVX512ICL) compiler += " AVX512ICL"; #endif #if defined(USE_VNNI) compiler += " VNNI"; #endif #if defined(USE_AVX512) compiler += " AVX512"; #endif compiler += (HasPext ? " BMI2" : ""); #if defined(USE_AVX2) compiler += " AVX2"; #endif #if defined(USE_SSE41) compiler += " SSE41"; #endif #if defined(USE_SSSE3) compiler += " SSSE3"; #endif #if defined(USE_SSE2) compiler += " SSE2"; #endif #if defined(USE_NEON_DOTPROD) compiler += " NEON_DOTPROD"; #elif defined(USE_NEON) compiler += " NEON"; #endif compiler += (HasPopCnt ? " POPCNT" : ""); #if !defined(NDEBUG) compiler += " DEBUG"; #endif compiler += "\nCompiler __VERSION__ macro : "; #ifdef __VERSION__ compiler += __VERSION__; #else compiler += "(undefined macro)"; #endif compiler += "\n"; return compiler; } // Debug functions used mainly to collect run-time statistics constexpr int MaxDebugSlots = 32; namespace { template struct DebugInfo { std::array, N> data = {0}; [[nodiscard]] constexpr std::atomic& operator[](size_t index) { assert(index < N); return data[index]; } constexpr DebugInfo& operator=(const DebugInfo& other) { for (size_t i = 0; i < N; i++) data[i].store(other.data[i].load()); return *this; } }; struct DebugExtremes: public DebugInfo<3> { DebugExtremes() { data[1] = std::numeric_limits::min(); data[2] = std::numeric_limits::max(); } }; std::array, MaxDebugSlots> hit; std::array, MaxDebugSlots> mean; std::array, MaxDebugSlots> stdev; std::array, MaxDebugSlots> correl; std::array extremes; } // namespace void dbg_hit_on(bool cond, int slot) { ++hit.at(slot)[0]; if (cond) ++hit.at(slot)[1]; } void dbg_mean_of(int64_t value, int slot) { ++mean.at(slot)[0]; mean.at(slot)[1] += value; } void dbg_stdev_of(int64_t value, int slot) { ++stdev.at(slot)[0]; stdev.at(slot)[1] += value; stdev.at(slot)[2] += value * value; } void dbg_extremes_of(int64_t value, int slot) { ++extremes.at(slot)[0]; int64_t current_max = extremes.at(slot)[1].load(); while (current_max < value && !extremes.at(slot)[1].compare_exchange_weak(current_max, value)) {} int64_t current_min = extremes.at(slot)[2].load(); while (current_min > value && !extremes.at(slot)[2].compare_exchange_weak(current_min, value)) {} } void dbg_correl_of(int64_t value1, int64_t value2, int slot) { ++correl.at(slot)[0]; correl.at(slot)[1] += value1; correl.at(slot)[2] += value1 * value1; correl.at(slot)[3] += value2; correl.at(slot)[4] += value2 * value2; correl.at(slot)[5] += value1 * value2; } void dbg_print() { int64_t n; auto E = [&n](int64_t x) { return double(x) / n; }; auto sqr = [](double x) { return x * x; }; for (int i = 0; i < MaxDebugSlots; ++i) if ((n = hit[i][0])) std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1] << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl; for (int i = 0; i < MaxDebugSlots; ++i) if ((n = mean[i][0])) { std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl; } for (int i = 0; i < MaxDebugSlots; ++i) if ((n = stdev[i][0])) { double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1]))); std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; } for (int i = 0; i < MaxDebugSlots; ++i) if ((n = extremes[i][0])) { std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2] << " Max " << extremes[i][1] << std::endl; } for (int i = 0; i < MaxDebugSlots; ++i) if ((n = correl[i][0])) { double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1]))) * sqrt(E(correl[i][4]) - sqr(E(correl[i][3])))); std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl; } } void dbg_clear() { hit.fill({}); mean.fill({}); stdev.fill({}); correl.fill({}); extremes.fill({}); } // Used to serialize access to std::cout // to avoid multiple threads writing at the same time. std::ostream& operator<<(std::ostream& os, SyncCout sc) { static std::mutex m; if (sc == IO_LOCK) m.lock(); if (sc == IO_UNLOCK) m.unlock(); return os; } void sync_cout_start() { std::cout << IO_LOCK; } void sync_cout_end() { std::cout << IO_UNLOCK; } // Hash function based on public domain MurmurHash64A, by Austin Appleby. uint64_t hash_bytes(const char* data, size_t size) { const uint64_t m = 0xc6a4a7935bd1e995ull; const int r = 47; uint64_t h = size * m; const char* end = data + (size & ~(size_t) 7); for (const char* p = data; p != end; p += 8) { uint64_t k; std::memcpy(&k, p, sizeof(k)); k *= m; k ^= k >> r; k *= m; h ^= k; h *= m; } if (size & 7) { uint64_t k = 0; for (int i = (size & 7) - 1; i >= 0; i--) k = (k << 8) | (uint64_t) end[i]; h ^= k; h *= m; } h ^= h >> r; h *= m; h ^= h >> r; return h; } // Trampoline helper to avoid moving Logger to misc.h void start_logger(const std::string& fname) { Logger::start(fname); } #ifdef _WIN32 #include #define GETCWD _getcwd #else #include #define GETCWD getcwd #endif size_t str_to_size_t(const std::string& s) { unsigned long long value = std::stoull(s); if (value > std::numeric_limits::max()) std::exit(EXIT_FAILURE); return static_cast(value); } std::optional read_file_to_string(const std::string& path) { std::ifstream f(path, std::ios_base::binary); if (!f) return std::nullopt; return std::string(std::istreambuf_iterator(f), std::istreambuf_iterator()); } void remove_whitespace(std::string& s) { s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end()); } bool is_whitespace(std::string_view s) { return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); }); } std::string CommandLine::get_binary_directory(std::string argv0) { std::string pathSeparator; #ifdef _WIN32 pathSeparator = "\\"; #ifdef _MSC_VER // Under windows argv[0] may not have the extension. Also _get_pgmptr() had // issues in some Windows 10 versions, so check returned values carefully. char* pgmptr = nullptr; if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) argv0 = pgmptr; #endif #else pathSeparator = "/"; #endif // Extract the working directory auto workingDirectory = CommandLine::get_working_directory(); // Extract the binary directory path from argv0 auto binaryDirectory = argv0; size_t pos = binaryDirectory.find_last_of("\\/"); if (pos == std::string::npos) binaryDirectory = "." + pathSeparator; else binaryDirectory.resize(pos + 1); // Pattern replacement: "./" at the start of path is replaced by the working directory if (binaryDirectory.find("." + pathSeparator) == 0) binaryDirectory.replace(0, 1, workingDirectory); return binaryDirectory; } std::string CommandLine::get_working_directory() { std::string workingDirectory = ""; char buff[40000]; char* cwd = GETCWD(buff, 40000); if (cwd) workingDirectory = cwd; return workingDirectory; } } // namespace Stockfish ================================================ FILE: src/misc.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef MISC_H_INCLUDED #define MISC_H_INCLUDED #include #include #include #include #include #include #include // IWYU pragma: keep // IWYU pragma: no_include <__exception/terminate.h> #include #include #include #include #include #include #include #include #include #if !defined(NO_PREFETCH) && (defined(_MSC_VER) || defined(__INTEL_COMPILER)) #include #endif #define stringify2(x) #x #define stringify(x) stringify2(x) namespace Stockfish { std::string engine_version_info(); std::string engine_info(bool to_uci = false); std::string compiler_info(); // Prefetch hint enums for explicit call-site control. enum class PrefetchRw { READ, WRITE }; // NOTE: PrefetchLoc controls locality / cache level, not whether a prefetch // is issued. In particular, PrefetchLoc::NONE maps to a non-temporal / // lowest-locality prefetch (Intel: _MM_HINT_NTA, GCC/Clang: locality = 0) // and therefore still performs a prefetch. To completely disable // prefetching, define NO_PREFETCH so that prefetch() becomes a no-op. enum class PrefetchLoc { NONE, // Non-temporal / no cache locality (still issues a prefetch) LOW, // Low locality (e.g. T2 / L2) MODERATE, // Moderate locality (e.g. T1 / L1) HIGH // High locality (e.g. T0 / closest cache) }; // Preloads the given address into cache. This is a non-blocking // function that doesn't stall the CPU waiting for data to be loaded from memory, // which can be quite slow. #ifdef NO_PREFETCH template void prefetch(const void*) {} #elif defined(_MSC_VER) || defined(__INTEL_COMPILER) constexpr int get_intel_hint(PrefetchRw rw, PrefetchLoc loc) { if (rw == PrefetchRw::WRITE) { #ifdef _MM_HINT_ET0 return _MM_HINT_ET0; #else // Fallback when write-prefetch hint is not available: use T0 return _MM_HINT_T0; #endif } switch (loc) { case PrefetchLoc::NONE : return _MM_HINT_NTA; case PrefetchLoc::LOW : return _MM_HINT_T2; case PrefetchLoc::MODERATE : return _MM_HINT_T1; case PrefetchLoc::HIGH : return _MM_HINT_T0; default : return _MM_HINT_T0; } } template void prefetch(const void* addr) { _mm_prefetch(static_cast(addr), get_intel_hint(RW, LOC)); } #else template void prefetch(const void* addr) { __builtin_prefetch(addr, static_cast(RW), static_cast(LOC)); } #endif void start_logger(const std::string& fname); size_t str_to_size_t(const std::string& s); #if defined(__linux__) struct PipeDeleter { void operator()(FILE* file) const { if (file != nullptr) { pclose(file); } } }; #endif // Reads the file as bytes. // Returns std::nullopt if the file does not exist. std::optional read_file_to_string(const std::string& path); void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); void dbg_stdev_of(int64_t value, int slot = 0); void dbg_extremes_of(int64_t value, int slot = 0); void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); void dbg_clear(); using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); inline TimePoint now() { return std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()) .count(); } inline std::vector split(std::string_view s, std::string_view delimiter) { std::vector res; if (s.empty()) return res; size_t begin = 0; for (;;) { const size_t end = s.find(delimiter, begin); if (end == std::string::npos) break; res.emplace_back(s.substr(begin, end - begin)); begin = end + delimiter.size(); } res.emplace_back(s.substr(begin)); return res; } void remove_whitespace(std::string& s); bool is_whitespace(std::string_view s); enum SyncCout { IO_LOCK, IO_UNLOCK }; std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK void sync_cout_start(); void sync_cout_end(); // True if and only if the binary is compiled on a little-endian machine static inline const std::uint16_t Le = 1; static inline const bool IsLittleEndian = *reinterpret_cast(&Le) == 1; template class ValueList { public: std::size_t size() const { return size_; } int ssize() const { return int(size_); } void push_back(const T& value) { assert(size_ < MaxSize); values_[size_++] = value; } const T* begin() const { return values_; } const T* end() const { return values_ + size_; } const T& operator[](int index) const { return values_[index]; } T* make_space(size_t count) { T* result = &values_[size_]; size_ += count; assert(size_ <= MaxSize); return result; } private: T values_[MaxSize]; std::size_t size_ = 0; }; template class MultiArray; namespace Detail { template struct MultiArrayHelper { using ChildType = MultiArray; }; template struct MultiArrayHelper { using ChildType = T; }; template constexpr bool is_strictly_assignable_v = std::is_assignable_v && (std::is_same_v || !std::is_convertible_v); } // MultiArray is a generic N-dimensional array. // The template parameters (Size and Sizes) encode the dimensions of the array. template class MultiArray { using ChildType = typename Detail::MultiArrayHelper::ChildType; using ArrayType = std::array; ArrayType data_; public: using value_type = typename ArrayType::value_type; using size_type = typename ArrayType::size_type; using difference_type = typename ArrayType::difference_type; using reference = typename ArrayType::reference; using const_reference = typename ArrayType::const_reference; using pointer = typename ArrayType::pointer; using const_pointer = typename ArrayType::const_pointer; using iterator = typename ArrayType::iterator; using const_iterator = typename ArrayType::const_iterator; using reverse_iterator = typename ArrayType::reverse_iterator; using const_reverse_iterator = typename ArrayType::const_reverse_iterator; constexpr auto& at(size_type index) noexcept { return data_.at(index); } constexpr const auto& at(size_type index) const noexcept { return data_.at(index); } constexpr auto& operator[](size_type index) noexcept { return data_[index]; } constexpr const auto& operator[](size_type index) const noexcept { return data_[index]; } constexpr auto& front() noexcept { return data_.front(); } constexpr const auto& front() const noexcept { return data_.front(); } constexpr auto& back() noexcept { return data_.back(); } constexpr const auto& back() const noexcept { return data_.back(); } auto* data() { return data_.data(); } const auto* data() const { return data_.data(); } constexpr auto begin() noexcept { return data_.begin(); } constexpr auto end() noexcept { return data_.end(); } constexpr auto begin() const noexcept { return data_.begin(); } constexpr auto end() const noexcept { return data_.end(); } constexpr auto cbegin() const noexcept { return data_.cbegin(); } constexpr auto cend() const noexcept { return data_.cend(); } constexpr auto rbegin() noexcept { return data_.rbegin(); } constexpr auto rend() noexcept { return data_.rend(); } constexpr auto rbegin() const noexcept { return data_.rbegin(); } constexpr auto rend() const noexcept { return data_.rend(); } constexpr auto crbegin() const noexcept { return data_.crbegin(); } constexpr auto crend() const noexcept { return data_.crend(); } constexpr bool empty() const noexcept { return data_.empty(); } constexpr size_type size() const noexcept { return data_.size(); } constexpr size_type max_size() const noexcept { return data_.max_size(); } template void fill(const U& v) { static_assert(Detail::is_strictly_assignable_v, "Cannot assign fill value to entry type"); for (auto& ele : data_) { if constexpr (sizeof...(Sizes) == 0) ele = v; else ele.fill(v); } } constexpr void swap(MultiArray& other) noexcept { data_.swap(other.data_); } }; // xorshift64star Pseudo-Random Number Generator // This class is based on original code written and dedicated // to the public domain by Sebastiano Vigna (2014). // It has the following characteristics: // // - Outputs 64-bit numbers // - Passes Dieharder and SmallCrush test batteries // - Does not require warm-up, no zeroland to escape // - Internal state is a single 64-bit integer // - Period is 2^64 - 1 // - Speed: 1.60 ns/call (Core i7 @3.40GHz) // // For further analysis see // class PRNG { uint64_t s; uint64_t rand64() { s ^= s >> 12, s ^= s << 25, s ^= s >> 27; return s * 2685821657736338717LL; } public: PRNG(uint64_t seed) : s(seed) { assert(seed); } template T rand() { return T(rand64()); } // Special generator used to fast init magic numbers. // Output values only have 1/8th of their bits set on average. template T sparse_rand() { return T(rand64() & rand64() & rand64()); } }; inline uint64_t mul_hi64(uint64_t a, uint64_t b) { #if defined(__GNUC__) && defined(IS_64BIT) __extension__ using uint128 = unsigned __int128; return (uint128(a) * uint128(b)) >> 64; #else uint64_t aL = uint32_t(a), aH = a >> 32; uint64_t bL = uint32_t(b), bH = b >> 32; uint64_t c1 = (aL * bL) >> 32; uint64_t c2 = aH * bL + c1; uint64_t c3 = aL * bH + uint32_t(c2); return aH * bH + (c2 >> 32) + (c3 >> 32); #endif } uint64_t hash_bytes(const char*, size_t); template inline std::size_t get_raw_data_hash(const T& value) { // We must have no padding bytes because we're reinterpreting as char static_assert(std::has_unique_object_representations()); return static_cast( hash_bytes(reinterpret_cast(&value), sizeof(value))); } template inline void hash_combine(std::size_t& seed, const T& v) { std::size_t x; // For primitive types we avoid using the default hasher, which may be // nondeterministic across program invocations if constexpr (std::is_integral()) x = v; else x = std::hash{}(v); seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); } inline std::uint64_t hash_string(const std::string& sv) { return hash_bytes(sv.data(), sv.size()); } template class FixedString { public: FixedString() : length_(0) { data_[0] = '\0'; } FixedString(const char* str) { size_t len = std::strlen(str); if (len > Capacity) std::terminate(); std::memcpy(data_, str, len); length_ = len; data_[length_] = '\0'; } FixedString(const std::string& str) { if (str.size() > Capacity) std::terminate(); std::memcpy(data_, str.data(), str.size()); length_ = str.size(); data_[length_] = '\0'; } std::size_t size() const { return length_; } std::size_t capacity() const { return Capacity; } const char* c_str() const { return data_; } const char* data() const { return data_; } char& operator[](std::size_t i) { return data_[i]; } const char& operator[](std::size_t i) const { return data_[i]; } FixedString& operator+=(const char* str) { size_t len = std::strlen(str); if (length_ + len > Capacity) std::terminate(); std::memcpy(data_ + length_, str, len); length_ += len; data_[length_] = '\0'; return *this; } FixedString& operator+=(const FixedString& other) { return (*this += other.c_str()); } operator std::string() const { return std::string(data_, length_); } operator std::string_view() const { return std::string_view(data_, length_); } template bool operator==(const T& other) const noexcept { return (std::string_view) (*this) == other; } template bool operator!=(const T& other) const noexcept { return (std::string_view) (*this) != other; } void clear() { length_ = 0; data_[0] = '\0'; } private: char data_[Capacity + 1]; // +1 for null terminator std::size_t length_; }; struct CommandLine { public: CommandLine(int _argc, char** _argv) : argc(_argc), argv(_argv) {} static std::string get_binary_directory(std::string argv0); static std::string get_working_directory(); int argc; char** argv; }; namespace Utility { template void move_to_front(std::vector& vec, Predicate pred) { auto it = std::find_if(vec.begin(), vec.end(), pred); if (it != vec.end()) { std::rotate(vec.begin(), it, it + 1); } } } #if defined(__GNUC__) #define sf_always_inline __attribute__((always_inline)) #elif defined(_MSC_VER) #define sf_always_inline __forceinline #else // do nothing for other compilers #define sf_always_inline #endif #if defined(__clang__) #define sf_assume(cond) __builtin_assume(cond) #elif defined(__GNUC__) #if __GNUC__ >= 13 #define sf_assume(cond) __attribute__((assume(cond))) #else #define sf_assume(cond) \ do \ { \ if (!(cond)) \ __builtin_unreachable(); \ } while (0) #endif #elif defined(_MSC_VER) #define sf_assume(cond) __assume(cond) #else // do nothing for other compilers #define sf_assume(cond) #endif #ifdef __GNUC__ #define sf_unreachable() __builtin_unreachable() #elif defined(_MSC_VER) #define sf_unreachable() __assume(0) #else #define sf_unreachable() #endif } // namespace Stockfish template struct std::hash> { std::size_t operator()(const Stockfish::FixedString& fstr) const noexcept { return Stockfish::hash_bytes(fstr.data(), fstr.size()); } }; #endif // #ifndef MISC_H_INCLUDED ================================================ FILE: src/movegen.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "movegen.h" #include #include #include "bitboard.h" #include "position.h" #if defined(USE_AVX512ICL) #include #include #include #endif namespace Stockfish { namespace { #if defined(USE_AVX512ICL) // clang-format off const __m512i AllSquares = _mm512_set_epi8( 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); // clang-format on template inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) { assert(popcount(to_bb) <= 8); // <= 8 pawns per side const __m128i toSquares = _mm_cvtepi8_epi16(_mm512_castsi512_si128(_mm512_maskz_compress_epi8(to_bb, AllSquares))); const __m128i fromSquares = _mm_subs_epi16(toSquares, _mm_set1_epi16(offset)); const __m128i moves = _mm_or_si128(_mm_slli_epi16(fromSquares, Move::FromSqShift), _mm_slli_epi16(toSquares, Move::ToSqShift)); _mm_storeu_si128(reinterpret_cast<__m128i*>(moveList), moves); return moveList + popcount(to_bb); } inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) { assert(popcount(to_bb) <= 32); // Q can attack up to 27 squares const __m512i fromVec = _mm512_set1_epi16(Move(from, SQUARE_ZERO).raw()); const __m512i toSquares = _mm512_cvtepi8_epi16(_mm512_castsi512_si256(_mm512_maskz_compress_epi8(to_bb, AllSquares))); const __m512i moves = _mm512_or_si512(fromVec, _mm512_slli_epi16(toSquares, Move::ToSqShift)); _mm512_storeu_si512(moveList, moves); return moveList + popcount(to_bb); } #else template inline Move* splat_pawn_moves(Move* moveList, Bitboard to_bb) { while (to_bb) { Square to = pop_lsb(to_bb); *moveList++ = Move(to - offset, to); } return moveList; } inline Move* splat_moves(Move* moveList, Square from, Bitboard to_bb) { while (to_bb) *moveList++ = Move(from, pop_lsb(to_bb)); return moveList; } #endif template Move* make_promotions(Move* moveList, [[maybe_unused]] Square to) { constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS; if constexpr (Type == CAPTURES || all) *moveList++ = Move::make(to - D, to, QUEEN); if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all) { *moveList++ = Move::make(to - D, to, ROOK); *moveList++ = Move::make(to - D, to, BISHOP); *moveList++ = Move::make(to - D, to, KNIGHT); } return moveList; } template Move* generate_pawn_moves(const Position& pos, Move* moveList, Bitboard target) { constexpr Color Them = ~Us; constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); constexpr Direction Up = pawn_push(Us); constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); const Bitboard emptySquares = ~pos.pieces(); const Bitboard enemies = Type == EVASIONS ? pos.checkers() : pos.pieces(Them); Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; // Single and double pawn pushes, no promotions if constexpr (Type != CAPTURES) { Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; if constexpr (Type == EVASIONS) // Consider only blocking squares { b1 &= target; b2 &= target; } moveList = splat_pawn_moves(moveList, b1); moveList = splat_pawn_moves(moveList, b2); } // Promotions and underpromotions if (pawnsOn7) { Bitboard b1 = shift(pawnsOn7) & enemies; Bitboard b2 = shift(pawnsOn7) & enemies; Bitboard b3 = shift(pawnsOn7) & emptySquares; if constexpr (Type == EVASIONS) b3 &= target; while (b1) moveList = make_promotions(moveList, pop_lsb(b1)); while (b2) moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { Bitboard b1 = shift(pawnsNotOn7) & enemies; Bitboard b2 = shift(pawnsNotOn7) & enemies; moveList = splat_pawn_moves(moveList, b1); moveList = splat_pawn_moves(moveList, b2); if (pos.ep_square() != SQ_NONE) { assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6)); // An en passant capture cannot resolve a discovered check if (Type == EVASIONS && (target & (pos.ep_square() + Up))) return moveList; b1 = pawnsNotOn7 & attacks_bb(pos.ep_square(), Them); assert(b1); while (b1) *moveList++ = Move::make(pop_lsb(b1), pos.ep_square()); } } return moveList; } template Move* generate_moves(const Position& pos, Move* moveList, Bitboard target) { static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); Bitboard bb = pos.pieces(Us, Pt); while (bb) { Square from = pop_lsb(bb); Bitboard b = attacks_bb(from, pos.pieces()) & target; moveList = splat_moves(moveList, from, b); } return moveList; } template Move* generate_all(const Position& pos, Move* moveList) { static_assert(Type != LEGAL, "Unsupported type in generate_all()"); const Square ksq = pos.square(Us); Bitboard target; // Skip generating non-king moves when in double check if (Type != EVASIONS || !more_than_one(pos.checkers())) { target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) : Type == NON_EVASIONS ? ~pos.pieces(Us) : Type == CAPTURES ? pos.pieces(~Us) : ~pos.pieces(); // QUIETS moveList = generate_pawn_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); } Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); moveList = splat_moves(moveList, ksq, b); if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = Move::make(ksq, pos.castling_rook_square(cr)); return moveList; } } // namespace // Generates all pseudo-legal captures plus queen promotions // Generates all pseudo-legal non-captures and underpromotions // Generates all pseudo-legal check evasions // Generates all pseudo-legal captures and non-captures // // Returns a pointer to the end of the move list. template Move* generate(const Position& pos, Move* moveList) { static_assert(Type != LEGAL, "Unsupported type in generate()"); assert((Type == EVASIONS) == bool(pos.checkers())); Color us = pos.side_to_move(); return us == WHITE ? generate_all(pos, moveList) : generate_all(pos, moveList); } // Explicit template instantiations template Move* generate(const Position&, Move*); template Move* generate(const Position&, Move*); template Move* generate(const Position&, Move*); template Move* generate(const Position&, Move*); // generate generates all the legal moves in the given position template<> Move* generate(const Position& pos, Move* moveList) { Color us = pos.side_to_move(); Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); Square ksq = pos.square(us); Move* cur = moveList; moveList = pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); while (cur != moveList) if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT) && !pos.legal(*cur)) *cur = *(--moveList); else ++cur; return moveList; } } // namespace Stockfish ================================================ FILE: src/movegen.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef MOVEGEN_H_INCLUDED #define MOVEGEN_H_INCLUDED #include // IWYU pragma: keep #include #include "types.h" namespace Stockfish { class Position; enum GenType { CAPTURES, QUIETS, EVASIONS, NON_EVASIONS, LEGAL }; struct ExtMove: public Move { int value; void operator=(Move m) { data = m.raw(); } // Inhibit unwanted implicit conversions to Move // with an ambiguity that yields to a compile error. operator float() const = delete; }; inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; } template Move* generate(const Position& pos, Move* moveList); // The MoveList struct wraps the generate() function and returns a convenient // list of moves. Using MoveList is sometimes preferable to directly calling // the lower level generate() function. template struct MoveList { explicit MoveList(const Position& pos) : last(generate(pos, moveList)) {} const Move* begin() const { return moveList; } const Move* end() const { return last; } size_t size() const { return last - moveList; } bool contains(Move move) const { return std::find(begin(), end(), move) != end(); } private: Move moveList[MAX_MOVES], *last; }; } // namespace Stockfish #endif // #ifndef MOVEGEN_H_INCLUDED ================================================ FILE: src/movepick.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "movepick.h" #include #include #include #include "bitboard.h" #include "misc.h" #include "position.h" namespace Stockfish { namespace { enum Stages { // generate main search moves MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, QUIET_INIT, GOOD_QUIET, BAD_CAPTURE, BAD_QUIET, // generate evasion moves EVASION_TT, EVASION_INIT, EVASION, // generate probcut moves PROBCUT_TT, PROBCUT_INIT, PROBCUT, // generate qsearch moves QSEARCH_TT, QCAPTURE_INIT, QCAPTURE }; // Sort moves in descending order up to and including a given limit. // The order of moves smaller than the limit is left unspecified. void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) if (p->value >= limit) { ExtMove tmp = *p, *q; *p = *++sortedEnd; for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q) *q = *(q - 1); *q = tmp; } } } // namespace // Constructors of the MovePicker class. As arguments, we pass information // to decide which class of moves to emit, to help sorting the (presumably) // good moves first, and how important move ordering is at the current node. // MovePicker constructor for the main search and for the quiescence search MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lph, const CapturePieceToHistory* cph, const PieceToHistory** ch, const SharedHistories* sh, int pl) : pos(p), mainHistory(mh), lowPlyHistory(lph), captureHistory(cph), continuationHistory(ch), sharedHistory(sh), ttMove(ttm), depth(d), ply(pl) { if (pos.checkers()) stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm)); else stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } // MovePicker constructor for ProbCut: we generate captures with Static Exchange // Evaluation (SEE) greater than or equal to the given threshold. MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) { assert(!pos.checkers()); stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm)); } // Assigns a numerical value to each move in a list, used for sorting. // Captures are ordered by Most Valuable Victim (MVV), preferring captures // with a good history. Quiets moves are ordered using the history tables. template ExtMove* MovePicker::score(MoveList& ml) { static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); Color us = pos.side_to_move(); [[maybe_unused]] Bitboard threatByLesser[KING + 1]; if constexpr (Type == QUIETS) { threatByLesser[PAWN] = 0; threatByLesser[KNIGHT] = threatByLesser[BISHOP] = pos.attacks_by(~us); threatByLesser[ROOK] = pos.attacks_by(~us) | pos.attacks_by(~us) | threatByLesser[KNIGHT]; threatByLesser[QUEEN] = pos.attacks_by(~us) | threatByLesser[ROOK]; threatByLesser[KING] = 0; } ExtMove* it = cur; for (auto move : ml) { ExtMove& m = *it++; m = move; const Square from = m.from_sq(); const Square to = m.to_sq(); const Piece pc = pos.moved_piece(m); const PieceType pt = type_of(pc); const Piece capturedPiece = pos.piece_on(to); if constexpr (Type == CAPTURES) m.value = (*captureHistory)[pc][to][type_of(capturedPiece)] + 7 * int(PieceValue[capturedPiece]); else if constexpr (Type == QUIETS) { // histories m.value = 2 * (*mainHistory)[us][m.raw()]; m.value += 2 * sharedHistory->pawn_entry(pos)[pc][to]; m.value += (*continuationHistory[0])[pc][to]; m.value += (*continuationHistory[1])[pc][to]; m.value += (*continuationHistory[2])[pc][to]; m.value += (*continuationHistory[3])[pc][to]; m.value += (*continuationHistory[5])[pc][to]; // bonus for checks m.value += (bool(pos.check_squares(pt) & to) && pos.see_ge(m, -75)) * 16384; // penalty for moving to a square threatened by a lesser piece // or bonus for escaping an attack by a lesser piece. int v = 20 * (bool(threatByLesser[pt] & from) - bool(threatByLesser[pt] & to)); m.value += PieceValue[pt] * v; if (ply < LOW_PLY_HISTORY_SIZE) m.value += 8 * (*lowPlyHistory)[ply][m.raw()] / (1 + ply); } else // Type == EVASIONS { if (pos.capture_stage(m)) m.value = PieceValue[capturedPiece] + (1 << 28); else m.value = (*mainHistory)[us][m.raw()] + (*continuationHistory[0])[pc][to]; } } return it; } // Returns the next move satisfying a predicate function. // This never returns the TT move, as it was emitted before. template Move MovePicker::select(Pred filter) { for (; cur < endCur; ++cur) if (*cur != ttMove && filter()) return *cur++; return Move::none(); } // This is the most important method of the MovePicker class. We emit one // new pseudo-legal move on every call until there are no more moves left, // picking the move with the highest score from a list of generated moves. Move MovePicker::next_move() { constexpr int goodQuietThreshold = -14000; top: switch (stage) { case MAIN_TT : case EVASION_TT : case QSEARCH_TT : case PROBCUT_TT : ++stage; return ttMove; case CAPTURE_INIT : case PROBCUT_INIT : case QCAPTURE_INIT : { MoveList ml(pos); cur = endBadCaptures = moves; endCur = endCaptures = score(ml); partial_insertion_sort(cur, endCur, std::numeric_limits::min()); ++stage; goto top; } case GOOD_CAPTURE : if (select([&]() { if (pos.see_ge(*cur, -cur->value / 18)) return true; std::swap(*endBadCaptures++, *cur); return false; })) return *(cur - 1); ++stage; [[fallthrough]]; case QUIET_INIT : if (!skipQuiets) { MoveList ml(pos); endCur = endGenerated = score(ml); partial_insertion_sort(cur, endCur, -3560 * depth); } ++stage; [[fallthrough]]; case GOOD_QUIET : if (!skipQuiets && select([&]() { return cur->value > goodQuietThreshold; })) return *(cur - 1); // Prepare the pointers to loop over the bad captures cur = moves; endCur = endBadCaptures; ++stage; [[fallthrough]]; case BAD_CAPTURE : if (select([]() { return true; })) return *(cur - 1); // Prepare the pointers to loop over quiets again cur = endCaptures; endCur = endGenerated; ++stage; [[fallthrough]]; case BAD_QUIET : if (!skipQuiets) return select([&]() { return cur->value <= goodQuietThreshold; }); return Move::none(); case EVASION_INIT : { MoveList ml(pos); cur = moves; endCur = endGenerated = score(ml); partial_insertion_sort(cur, endCur, std::numeric_limits::min()); ++stage; [[fallthrough]]; } case EVASION : case QCAPTURE : return select([]() { return true; }); case PROBCUT : return select([&]() { return pos.see_ge(*cur, threshold); }); } assert(false); return Move::none(); // Silence warning } void MovePicker::skip_quiet_moves() { skipQuiets = true; } } // namespace Stockfish ================================================ FILE: src/movepick.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef MOVEPICK_H_INCLUDED #define MOVEPICK_H_INCLUDED #include "history.h" #include "movegen.h" #include "types.h" namespace Stockfish { class Position; // The MovePicker class is used to pick one pseudo-legal move at a time from the // current position. The most important method is next_move(), which emits one // new pseudo-legal move on every call, until there are no moves left, when // Move::none() is returned. In order to improve the efficiency of the alpha-beta // algorithm, MovePicker attempts to return the moves which are most likely to get // a cut-off first. class MovePicker { public: MovePicker(const MovePicker&) = delete; MovePicker& operator=(const MovePicker&) = delete; MovePicker(const Position&, Move, Depth, const ButterflyHistory*, const LowPlyHistory*, const CapturePieceToHistory*, const PieceToHistory**, const SharedHistories*, int); MovePicker(const Position&, Move, int, const CapturePieceToHistory*); Move next_move(); void skip_quiet_moves(); private: template Move select(Pred); template ExtMove* score(MoveList&); ExtMove* begin() { return cur; } ExtMove* end() { return endCur; } const Position& pos; const ButterflyHistory* mainHistory; const LowPlyHistory* lowPlyHistory; const CapturePieceToHistory* captureHistory; const PieceToHistory** continuationHistory; const SharedHistories* sharedHistory; Move ttMove; ExtMove * cur, *endCur, *endBadCaptures, *endCaptures, *endGenerated; int stage; int threshold; Depth depth; int ply; bool skipQuiets = false; ExtMove moves[MAX_MOVES]; }; } // namespace Stockfish #endif // #ifndef MOVEPICK_H_INCLUDED ================================================ FILE: src/nnue/features/full_threats.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ //Definition of input features FullThreats of NNUE evaluation function #include "full_threats.h" #include #include #include #include #include #include "../../bitboard.h" #include "../../misc.h" #include "../../position.h" #include "../../types.h" #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Features { struct HelperOffsets { int cumulativePieceOffset, cumulativeOffset; }; constexpr std::array AllPieces = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, }; template constexpr auto make_piece_indices_type() { static_assert(PT != PieceType::PAWN); std::array, SQUARE_NB> out{}; for (Square from = SQ_A1; from <= SQ_H8; ++from) { Bitboard attacks = PseudoAttacks[PT][from]; for (Square to = SQ_A1; to <= SQ_H8; ++to) { out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks); } } return out; } template constexpr auto make_piece_indices_piece() { static_assert(type_of(P) == PieceType::PAWN); std::array, SQUARE_NB> out{}; constexpr Color C = color_of(P); for (Square from = SQ_A1; from <= SQ_H8; ++from) { Bitboard attacks = PseudoAttacks[C][from]; for (Square to = SQ_A1; to <= SQ_H8; ++to) { out[from][to] = constexpr_popcount(((1ULL << to) - 1) & attacks); } } return out; } constexpr auto index_lut2_array() { constexpr auto KNIGHT_ATTACKS = make_piece_indices_type(); constexpr auto BISHOP_ATTACKS = make_piece_indices_type(); constexpr auto ROOK_ATTACKS = make_piece_indices_type(); constexpr auto QUEEN_ATTACKS = make_piece_indices_type(); constexpr auto KING_ATTACKS = make_piece_indices_type(); std::array, SQUARE_NB>, PIECE_NB> indices{}; indices[W_PAWN] = make_piece_indices_piece(); indices[B_PAWN] = make_piece_indices_piece(); indices[W_KNIGHT] = KNIGHT_ATTACKS; indices[B_KNIGHT] = KNIGHT_ATTACKS; indices[W_BISHOP] = BISHOP_ATTACKS; indices[B_BISHOP] = BISHOP_ATTACKS; indices[W_ROOK] = ROOK_ATTACKS; indices[B_ROOK] = ROOK_ATTACKS; indices[W_QUEEN] = QUEEN_ATTACKS; indices[B_QUEEN] = QUEEN_ATTACKS; indices[W_KING] = KING_ATTACKS; indices[B_KING] = KING_ATTACKS; return indices; } constexpr auto init_threat_offsets() { std::array indices{}; std::array, PIECE_NB> offsets{}; int cumulativeOffset = 0; for (Piece piece : AllPieces) { int pieceIdx = piece; int cumulativePieceOffset = 0; for (Square from = SQ_A1; from <= SQ_H8; ++from) { offsets[pieceIdx][from] = cumulativePieceOffset; if (type_of(piece) != PAWN) { Bitboard attacks = PseudoAttacks[type_of(piece)][from]; cumulativePieceOffset += constexpr_popcount(attacks); } else if (from >= SQ_A2 && from <= SQ_H7) { Bitboard attacks = (pieceIdx < 8) ? pawn_attacks_bb(square_bb(from)) : pawn_attacks_bb(square_bb(from)); cumulativePieceOffset += constexpr_popcount(attacks); } } indices[pieceIdx] = {cumulativePieceOffset, cumulativeOffset}; cumulativeOffset += numValidTargets[pieceIdx] * cumulativePieceOffset; } return std::pair{indices, offsets}; } constexpr auto helper_offsets = init_threat_offsets().first; // Lookup array for indexing threats constexpr auto offsets = init_threat_offsets().second; constexpr auto init_index_luts() { std::array, PIECE_NB>, PIECE_NB> indices{}; for (Piece attacker : AllPieces) { for (Piece attacked : AllPieces) { bool enemy = (attacker ^ attacked) == 8; PieceType attackerType = type_of(attacker); PieceType attackedType = type_of(attacked); int map = FullThreats::map[attackerType - 1][attackedType - 1]; bool semi_excluded = attackerType == attackedType && (enemy || attackerType != PAWN); IndexType feature = helper_offsets[attacker].cumulativeOffset + (color_of(attacked) * (numValidTargets[attacker] / 2) + map) * helper_offsets[attacker].cumulativePieceOffset; bool excluded = map < 0; indices[attacker][attacked][0] = excluded ? FullThreats::Dimensions : feature; indices[attacker][attacked][1] = excluded || semi_excluded ? FullThreats::Dimensions : feature; } } return indices; } // The final index is calculated from summing data found in these two LUTs, as well // as offsets[attacker][from] // [attacker][attacked][from < to] constexpr auto index_lut1 = init_index_luts(); // [attacker][from][to] constexpr auto index_lut2 = index_lut2_array(); // Index of a feature for a given king position and another piece on some square inline sf_always_inline IndexType FullThreats::make_index( Color perspective, Piece attacker, Square from, Square to, Piece attacked, Square ksq) { const std::int8_t orientation = OrientTBL[ksq] ^ (56 * perspective); unsigned from_oriented = uint8_t(from) ^ orientation; unsigned to_oriented = uint8_t(to) ^ orientation; std::int8_t swap = 8 * perspective; unsigned attacker_oriented = attacker ^ swap; unsigned attacked_oriented = attacked ^ swap; return index_lut1[attacker_oriented][attacked_oriented][from_oriented < to_oriented] + offsets[attacker_oriented][from_oriented] + index_lut2[attacker_oriented][from_oriented][to_oriented]; } // Get a list of indices for active features in ascending order void FullThreats::append_active_indices(Color perspective, const Position& pos, IndexList& active) { Square ksq = pos.square(perspective); Bitboard occupied = pos.pieces(); for (Color color : {WHITE, BLACK}) { for (PieceType pt = PAWN; pt < KING; ++pt) { Color c = Color(perspective ^ color); Piece attacker = make_piece(c, pt); Bitboard bb = pos.pieces(c, pt); if (pt == PAWN) { auto right = (c == WHITE) ? NORTH_EAST : SOUTH_WEST; auto left = (c == WHITE) ? NORTH_WEST : SOUTH_EAST; auto attacks_left = ((c == WHITE) ? shift(bb) : shift(bb)) & occupied; auto attacks_right = ((c == WHITE) ? shift(bb) : shift(bb)) & occupied; while (attacks_left) { Square to = pop_lsb(attacks_left); Square from = to - right; Piece attacked = pos.piece_on(to); IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); if (index < Dimensions) active.push_back(index); } while (attacks_right) { Square to = pop_lsb(attacks_right); Square from = to - left; Piece attacked = pos.piece_on(to); IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); if (index < Dimensions) active.push_back(index); } } else { while (bb) { Square from = pop_lsb(bb); Bitboard attacks = (attacks_bb(pt, from, occupied)) & occupied; while (attacks) { Square to = pop_lsb(attacks); Piece attacked = pos.piece_on(to); IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); if (index < Dimensions) active.push_back(index); } } } } } } // Get a list of indices for recently changed features void FullThreats::append_changed_indices(Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added, FusedUpdateData* fusedData, bool first, const ThreatWeightType* prefetchBase, IndexType prefetchStride) { for (const auto& dirty : diff.list) { auto attacker = dirty.pc(); auto attacked = dirty.threatened_pc(); auto from = dirty.pc_sq(); auto to = dirty.threatened_sq(); auto add = dirty.add(); if (fusedData) { if (from == fusedData->dp2removed) { if (add) { if (first) { fusedData->dp2removedOriginBoard |= to; continue; } } else if (fusedData->dp2removedOriginBoard & to) continue; } if (to != SQ_NONE && to == fusedData->dp2removed) { if (add) { if (first) { fusedData->dp2removedTargetBoard |= from; continue; } } else if (fusedData->dp2removedTargetBoard & from) continue; } } auto& insert = add ? added : removed; const IndexType index = make_index(perspective, attacker, from, to, attacked, ksq); if (index < Dimensions) { if (prefetchBase) prefetch( prefetchBase + static_cast(index) * prefetchStride); insert.push_back(index); } } } bool FullThreats::requires_refresh(const DiffType& diff, Color perspective) { return perspective == diff.us && (int8_t(diff.ksq) & 0b100) != (int8_t(diff.prevKsq) & 0b100); } } // namespace Stockfish::Eval::NNUE::Features ================================================ FILE: src/nnue/features/full_threats.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ //Definition of input features Simplified_Threats of NNUE evaluation function #ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED #define NNUE_FEATURES_FULL_THREATS_INCLUDED #include #include "../../misc.h" #include "../../types.h" #include "../nnue_common.h" namespace Stockfish { class Position; } namespace Stockfish::Eval::NNUE::Features { static constexpr int numValidTargets[PIECE_NB] = {0, 6, 10, 8, 8, 10, 0, 0, 0, 6, 10, 8, 8, 10, 0, 0}; class FullThreats { public: // Feature name static constexpr const char* Name = "Full_Threats(Friend)"; // Hash value embedded in the evaluation file static constexpr std::uint32_t HashValue = 0x8f234cb8u; // Number of feature dimensions static constexpr IndexType Dimensions = 60144; // clang-format off // Orient a square according to perspective (rotates by 180 for black) static constexpr std::int8_t OrientTBL[SQUARE_NB] = { SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, }; static constexpr int map[PIECE_TYPE_NB-2][PIECE_TYPE_NB-2] = { { 0, 1, -1, 2, -1, -1}, { 0, 1, 2, 3, 4, -1}, { 0, 1, 2, 3, -1, -1}, { 0, 1, 2, 3, -1, -1}, { 0, 1, 2, 3, 4, -1}, {-1, -1, -1, -1, -1, -1} }; // clang-format on struct FusedUpdateData { Bitboard dp2removedOriginBoard = 0; Bitboard dp2removedTargetBoard = 0; Square dp2removed; }; // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 128; using IndexList = ValueList; using DiffType = DirtyThreats; static IndexType make_index(Color perspective, Piece attkr, Square from, Square to, Piece attkd, Square ksq); // Get a list of indices for active features static void append_active_indices(Color perspective, const Position& pos, IndexList& active); // Get a list of indices for recently changed features static void append_changed_indices(Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added, FusedUpdateData* fd = nullptr, bool first = false, const ThreatWeightType* prefetchBase = nullptr, IndexType prefetchStride = 0); // Returns whether the change stored in this DirtyPiece means // that a full accumulator refresh is required. static bool requires_refresh(const DiffType& diff, Color perspective); }; } // namespace Stockfish::Eval::NNUE::Features #endif // #ifndef NNUE_FEATURES_FULL_THREATS_INCLUDED ================================================ FILE: src/nnue/features/half_ka_v2_hm.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ //Definition of input features HalfKAv2_hm of NNUE evaluation function #include "half_ka_v2_hm.h" #include "../../bitboard.h" #include "../../position.h" #include "../../types.h" #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Features { #if defined(USE_AVX512ICL) void HalfKAv2_hm::write_indices(const std::array& oldPieces, const std::array& newPieces, Bitboard removedBB, Bitboard addedBB, Color perspective, Square ksq, IndexList& removed, IndexList& added) { auto* write_removed = removed.make_space(popcount(removedBB)); auto* write_added = added.make_space(popcount(addedBB)); const __m512i vecOldPieces = _mm512_loadu_si512(oldPieces.data()); const __m512i vecNewPieces = _mm512_loadu_si512(newPieces.data()); static constexpr uint16_t psiTable[COLOR_NB][32] = { {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE}, {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE, PS_NONE}}; const __m512i psi = _mm512_loadu_si512(psiTable[perspective]); const __m512i allSquares = _mm512_set_epi8( 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const uint16_t flip = 56 * perspective; const __m512i orient = _mm512_set1_epi16((uint16_t) OrientTBL[ksq] ^ flip); const __m512i bucket = _mm512_set1_epi16((uint16_t) KingBuckets[int(ksq) ^ flip]); __m512i removed_squares = _mm512_maskz_compress_epi8(removedBB, allSquares); __m512i removed_pieces = _mm512_permutexvar_epi8(removed_squares, vecOldPieces); removed_squares = _mm512_cvtepi8_epi16(_mm512_castsi512_si256(removed_squares)); removed_pieces = _mm512_cvtepi8_epi16(_mm512_castsi512_si256(removed_pieces)); const __m512i removed_psi = _mm512_permutexvar_epi16(removed_pieces, psi); __m512i removed_indices = _mm512_xor_si512(removed_squares, orient); removed_indices = _mm512_add_epi16(removed_indices, removed_psi); removed_indices = _mm512_add_epi16(removed_indices, bucket); __m512i added_squares = _mm512_maskz_compress_epi8(addedBB, allSquares); __m512i added_pieces = _mm512_permutexvar_epi8(added_squares, vecNewPieces); added_squares = _mm512_cvtepi8_epi16(_mm512_castsi512_si256(added_squares)); added_pieces = _mm512_cvtepi8_epi16(_mm512_castsi512_si256(added_pieces)); const __m512i added_psi = _mm512_permutexvar_epi16(added_pieces, psi); __m512i added_indices = _mm512_xor_si512(added_squares, orient); added_indices = _mm512_add_epi16(added_indices, added_psi); added_indices = _mm512_add_epi16(added_indices, bucket); const __m512i removed_indices0 = _mm512_cvtepi16_epi32(_mm512_castsi512_si256(removed_indices)); const __m512i removed_indices1 = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(removed_indices, 1)); _mm512_storeu_si512(write_removed, removed_indices0); _mm512_storeu_si512(write_removed + 16, removed_indices1); const __m512i added_indices0 = _mm512_cvtepi16_epi32(_mm512_castsi512_si256(added_indices)); const __m512i added_indices1 = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(added_indices, 1)); _mm512_storeu_si512(write_added, added_indices0); _mm512_storeu_si512(write_added + 16, added_indices1); } #endif // Index of a feature for a given king position and another piece on some square IndexType HalfKAv2_hm::make_index(Color perspective, Square s, Piece pc, Square ksq) { const IndexType flip = 56 * perspective; return (IndexType(s) ^ OrientTBL[ksq] ^ flip) + PieceSquareIndex[perspective][pc] + KingBuckets[int(ksq) ^ flip]; } // Get a list of indices for active features void HalfKAv2_hm::append_active_indices(Color perspective, const Position& pos, IndexList& active) { Square ksq = pos.square(perspective); Bitboard bb = pos.pieces(); while (bb) { Square s = pop_lsb(bb); active.push_back(make_index(perspective, s, pos.piece_on(s), ksq)); } } // Get a list of indices for recently changed features void HalfKAv2_hm::append_changed_indices( Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added) { removed.push_back(make_index(perspective, diff.from, diff.pc, ksq)); if (diff.to != SQ_NONE) added.push_back(make_index(perspective, diff.to, diff.pc, ksq)); if (diff.remove_sq != SQ_NONE) removed.push_back(make_index(perspective, diff.remove_sq, diff.remove_pc, ksq)); if (diff.add_sq != SQ_NONE) added.push_back(make_index(perspective, diff.add_sq, diff.add_pc, ksq)); } bool HalfKAv2_hm::requires_refresh(const DiffType& diff, Color perspective) { return diff.pc == make_piece(perspective, KING); } } // namespace Stockfish::Eval::NNUE::Features ================================================ FILE: src/nnue/features/half_ka_v2_hm.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ //Definition of input features HalfKP of NNUE evaluation function #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED #define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED #include #include "../../misc.h" #include "../../types.h" #include "../nnue_common.h" namespace Stockfish { class Position; } namespace Stockfish::Eval::NNUE::Features { // Feature HalfKAv2_hm: Combination of the position of own king and the // position of pieces. Position mirrored such that king is always on e..h files. class HalfKAv2_hm { // Unique number for each piece type on each square enum { PS_NONE = 0, PS_W_PAWN = 0, PS_B_PAWN = 1 * SQUARE_NB, PS_W_KNIGHT = 2 * SQUARE_NB, PS_B_KNIGHT = 3 * SQUARE_NB, PS_W_BISHOP = 4 * SQUARE_NB, PS_B_BISHOP = 5 * SQUARE_NB, PS_W_ROOK = 6 * SQUARE_NB, PS_B_ROOK = 7 * SQUARE_NB, PS_W_QUEEN = 8 * SQUARE_NB, PS_B_QUEEN = 9 * SQUARE_NB, PS_KING = 10 * SQUARE_NB, PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { // Convention: W - us, B - them // Viewed from other side, W and B are reversed {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE}, {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}}; public: // Feature name static constexpr const char* Name = "HalfKAv2_hm(Friend)"; // Hash value embedded in the evaluation file static constexpr std::uint32_t HashValue = 0x7f234cb8u; // Number of feature dimensions static constexpr IndexType Dimensions = static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; #define B(v) (v * PS_NB) // clang-format off static constexpr IndexType KingBuckets[SQUARE_NB] = { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28), B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20), B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16), B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12), B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8), B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4), B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0), }; // clang-format on #undef B // clang-format off // Orient a square according to perspective (rotates by 180 for black) static constexpr IndexType OrientTBL[SQUARE_NB] = { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 , }; // clang-format on // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; using IndexList = ValueList; using DiffType = DirtyPiece; #if defined(USE_AVX512ICL) // Compute all changed feature indices and write them to the given lists static void write_indices(const std::array& oldPieces, const std::array& newPieces, Bitboard removedBB, Bitboard addedBB, Color perspective, Square ksq, IndexList& removed, IndexList& added); #endif // Index of a feature for a given king position and another piece on some square static IndexType make_index(Color perspective, Square s, Piece pc, Square ksq); // Get a list of indices for active features static void append_active_indices(Color perspective, const Position& pos, IndexList& active); // Get a list of indices for recently changed features static void append_changed_indices( Color perspective, Square ksq, const DiffType& diff, IndexList& removed, IndexList& added); // Returns whether the change stored in this DirtyPiece means // that a full accumulator refresh is required. static bool requires_refresh(const DiffType& diff, Color perspective); }; } // namespace Stockfish::Eval::NNUE::Features #endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED ================================================ FILE: src/nnue/layers/affine_transform.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Definition of layer AffineTransform of NNUE evaluation function #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED #include #include #include "../../memory.h" #include "../nnue_common.h" #include "../simd.h" /* This file contains the definition for a fully connected layer (aka affine transform). - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. - that's why AVX512 is hard to implement - expected use-case is small layers - inputs are processed in chunks of 4, weights are respectively transposed - accumulation happens directly to int32s */ namespace Stockfish::Eval::NNUE::Layers { #if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD) #define ENABLE_SEQ_OPT #endif // Fallback implementation for older/other architectures. // Requires the input to be padded to at least 16 values. #ifndef ENABLE_SEQ_OPT template static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) { #if defined(USE_SSE2) || defined(USE_NEON) #if defined(USE_SSE2) // At least a multiple of 16, with SSE2. constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const __m128i Zeros = _mm_setzero_si128(); const auto inputVector = reinterpret_cast(input); #elif defined(USE_NEON) constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const auto inputVector = reinterpret_cast(input); #endif for (IndexType i = 0; i < OutputDimensions; ++i) { const IndexType offset = i * PaddedInputDimensions; #if defined(USE_SSE2) __m128i sumLo = _mm_cvtsi32_si128(biases[i]); __m128i sumHi = Zeros; const auto row = reinterpret_cast(&weights[offset]); for (IndexType j = 0; j < NumChunks; ++j) { __m128i row_j = _mm_load_si128(&row[j]); __m128i input_j = _mm_load_si128(&inputVector[j]); __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); sumLo = _mm_add_epi32(sumLo, productLo); sumHi = _mm_add_epi32(sumHi, productHi); } __m128i sum = _mm_add_epi32(sumLo, sumHi); __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); sum = _mm_add_epi32(sum, sumHigh_64); __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); sum = _mm_add_epi32(sum, sum_second_32); output[i] = _mm_cvtsi128_si32(sum); #elif defined(USE_NEON) int32x4_t sum = {biases[i]}; const auto row = reinterpret_cast(&weights[offset]); for (IndexType j = 0; j < NumChunks; ++j) { int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); sum = vpadalq_s16(sum, product); } output[i] = SIMD::neon_m128_reduce_add_epi32(sum); #endif } #else std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions); // Traverse weights in transpose order to take advantage of input sparsity for (IndexType i = 0; i < InputDimensions; ++i) if (input[i]) { const std::int8_t* w = &weights[i]; const int in = input[i]; for (IndexType j = 0; j < OutputDimensions; ++j) output[j] += w[j * PaddedInputDimensions] * in; } #endif } #endif // !ENABLE_SEQ_OPT template class AffineTransform { public: // Input/output type using InputType = std::uint8_t; using OutputType = std::int32_t; // Number of input/output dimensions static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = OutDims; static constexpr IndexType PaddedInputDimensions = ceil_to_multiple(InputDimensions, MaxSimdWidth); static constexpr IndexType PaddedOutputDimensions = ceil_to_multiple(OutputDimensions, MaxSimdWidth); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0xCC03DAE4u; hashValue += OutputDimensions; hashValue ^= prevHash >> 1; hashValue ^= prevHash << 31; return hashValue; } static constexpr IndexType get_weight_index_scrambled(IndexType i) { return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + i / PaddedInputDimensions * 4 + i % 4; } static constexpr IndexType get_weight_index(IndexType i) { #ifdef ENABLE_SEQ_OPT return get_weight_index_scrambled(i); #else return i; #endif } // Read network parameters bool read_parameters(std::istream& stream) { read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); return !stream.fail(); } std::size_t get_content_hash() const { std::size_t h = 0; hash_combine(h, get_raw_data_hash(biases)); hash_combine(h, get_raw_data_hash(weights)); hash_combine(h, get_hash_value(0)); return h; } // Forward propagation void propagate(const InputType* input, OutputType* output) const { #ifdef ENABLE_SEQ_OPT if constexpr (OutputDimensions > 1) { #if defined(USE_AVX512) using vec_t = __m512i; #define vec_set_32 _mm512_set1_epi32 #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32 #elif defined(USE_AVX2) using vec_t = __m256i; #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 #elif defined(USE_SSSE3) using vec_t = __m128i; #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 #elif defined(USE_NEON_DOTPROD) using vec_t = int32x4_t; #define vec_set_32 vdupq_n_s32 #define vec_add_dpbusd_32(acc, a, b) \ SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ vreinterpretq_s8_s32(b)) #endif static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); static_assert(OutputDimensions % OutputSimdWidth == 0); constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; const vec_t* biasvec = reinterpret_cast(biases); vec_t acc[NumRegs]; for (IndexType k = 0; k < NumRegs; ++k) acc[k] = biasvec[k]; for (IndexType i = 0; i < NumChunks; ++i) { const vec_t in0 = vec_set_32(load_as(input + i * sizeof(std::int32_t))); const auto col0 = reinterpret_cast(&weights[i * OutputDimensions * 4]); for (IndexType k = 0; k < NumRegs; ++k) vec_add_dpbusd_32(acc[k], in0, col0[k]); } vec_t* outptr = reinterpret_cast(output); for (IndexType k = 0; k < NumRegs; ++k) outptr[k] = acc[k]; #undef vec_set_32 #undef vec_add_dpbusd_32 } else if constexpr (OutputDimensions == 1) { // We cannot use AVX512 for the last layer because there are only 32 inputs // and the buffer is not padded to 64 elements. #if defined(USE_AVX2) using vec_t = __m256i; #define vec_setzero() _mm256_setzero_si256() #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 #define vec_hadd SIMD::m256_hadd #elif defined(USE_SSSE3) using vec_t = __m128i; #define vec_setzero() _mm_setzero_si128() #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 #define vec_hadd SIMD::m128_hadd #elif defined(USE_NEON_DOTPROD) using vec_t = int32x4_t; #define vec_setzero() vdupq_n_s32(0) #define vec_add_dpbusd_32(acc, a, b) \ SIMD::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \ vreinterpretq_s8_s32(b)) #define vec_hadd SIMD::neon_m128_hadd #endif const auto inputVector = reinterpret_cast(input); static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType); static_assert(PaddedInputDimensions % InputSimdWidth == 0); constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; vec_t sum0 = vec_setzero(); const auto row0 = reinterpret_cast(&weights[0]); for (int j = 0; j < int(NumChunks); ++j) { const vec_t in = inputVector[j]; vec_add_dpbusd_32(sum0, in, row0[j]); } output[0] = vec_hadd(sum0, biases[0]); #undef vec_setzero #undef vec_add_dpbusd_32 #undef vec_hadd } #else // Use old implementation for the other architectures. affine_transform_non_ssse3( output, weights, biases, input); #endif } private: using BiasType = OutputType; using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; }; } // namespace Stockfish::Eval::NNUE::Layers #endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED ================================================ FILE: src/nnue/layers/affine_transform_sparse_input.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Definition of layer AffineTransformSparseInput of NNUE evaluation function #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED #define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED #include #include #include #include #include "../../bitboard.h" #include "../../memory.h" #include "../simd.h" #include "../nnue_common.h" /* This file contains the definition for a fully connected layer (aka affine transform) with block sparse input. */ namespace Stockfish::Eval::NNUE::Layers { #if (USE_SSSE3 | (USE_NEON >= 8)) static constexpr int lsb_index64[64] = { 0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61, 54, 58, 35, 52, 50, 42, 21, 44, 38, 32, 29, 23, 17, 11, 4, 62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43, 31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63}; constexpr int constexpr_lsb(uint64_t bb) { assert(bb != 0); constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL; return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58]; } alignas(CacheLineSize) static constexpr struct OffsetIndices { std::uint16_t offset_indices[256][8]; constexpr OffsetIndices() : offset_indices() { for (int i = 0; i < 256; ++i) { std::uint64_t j = i, k = 0; while (j) { offset_indices[i][k++] = constexpr_lsb(j); j &= j - 1; } while (k < 8) offset_indices[i][k++] = 0; } } } Lookup; #if defined(__GNUC__) || defined(__clang__) #define RESTRICT __restrict__ #elif defined(_MSC_VER) #define RESTRICT __restrict #else #define RESTRICT #endif // Find indices of nonzero 32-bit values in a packed byte buffer. // The input pointer addresses a sequence of 32-bit blocks stored in a // std::uint8_t array. template void find_nnz(const std::uint8_t* RESTRICT input, std::uint16_t* RESTRICT out, IndexType& count_out) { #if defined(USE_AVX512ICL) constexpr IndexType SimdWidthIn = 64; // 512 bits constexpr IndexType SimdWidthOut = 32; // 512 bits / 16 bits constexpr IndexType NumChunks = InputDimensions / SimdWidthOut; const __m512i increment = _mm512_set1_epi16(SimdWidthOut); __m512i base = _mm512_set_epi16( // Same permute order as _mm512_packus_epi32() 31, 30, 29, 28, 15, 14, 13, 12, 27, 26, 25, 24, 11, 10, 9, 8, 23, 22, 21, 20, 7, 6, 5, 4, 19, 18, 17, 16, 3, 2, 1, 0); IndexType count = 0; for (IndexType i = 0; i < NumChunks; ++i) { const __m512i inputV0 = _mm512_load_si512(input + i * 2 * SimdWidthIn); const __m512i inputV1 = _mm512_load_si512(input + i * 2 * SimdWidthIn + SimdWidthIn); // Get a bitmask and gather non zero indices const __m512i inputV01 = _mm512_packus_epi32(inputV0, inputV1); const __mmask32 nnzMask = _mm512_test_epi16_mask(inputV01, inputV01); // Avoid _mm512_mask_compressstoreu_epi16() as it's 256 uOps on Zen4 __m512i nnz = _mm512_maskz_compress_epi16(nnzMask, base); _mm512_storeu_si512(out + count, nnz); count += popcount(nnzMask); base = _mm512_add_epi16(base, increment); } count_out = count; #elif defined(USE_AVX512) constexpr IndexType SimdWidth = 16; // 512 bits / 32 bits constexpr IndexType NumChunks = InputDimensions / SimdWidth; const __m512i increment = _mm512_set1_epi32(SimdWidth); __m512i base = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); IndexType count = 0; for (IndexType i = 0; i < NumChunks; ++i) { const __m512i inputV = _mm512_load_si512(input + i * SimdWidth * sizeof(std::uint32_t)); // Get a bitmask and gather non zero indices const __mmask16 nnzMask = _mm512_test_epi32_mask(inputV, inputV); const __m512i nnzV = _mm512_maskz_compress_epi32(nnzMask, base); _mm512_mask_cvtepi32_storeu_epi16(out + count, 0xFFFF, nnzV); count += popcount(nnzMask); base = _mm512_add_epi32(base, increment); } count_out = count; #else using namespace SIMD; constexpr IndexType InputSimdWidth = sizeof(vec_uint_t) / sizeof(std::int32_t); // Outputs are processed 8 elements at a time, even if the SIMD width is narrower constexpr IndexType ChunkSize = 8; constexpr IndexType NumChunks = InputDimensions / ChunkSize; constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; static_assert(InputsPerChunk > 0 && "SIMD width too wide"); const auto inputVector = reinterpret_cast(input); IndexType count = 0; vec128_t base = vec128_zero; const vec128_t increment = vec128_set_16(8); for (IndexType i = 0; i < NumChunks; ++i) { // bitmask of nonzero values in this chunk unsigned nnz = 0; for (IndexType j = 0; j < InputsPerChunk; ++j) { const vec_uint_t inputChunk = inputVector[i * InputsPerChunk + j]; nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth); } const vec128_t offsets = vec128_load(reinterpret_cast(&Lookup.offset_indices[nnz])); vec128_storeu(reinterpret_cast(out + count), vec128_add(base, offsets)); count += popcount(nnz); base = vec128_add(base, increment); } count_out = count; #endif } #endif // Sparse input implementation template class AffineTransformSparseInput { public: // Input/output type using InputType = std::uint8_t; using OutputType = std::int32_t; // Number of input/output dimensions static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = OutDims; static_assert(OutputDimensions % 16 == 0, "Only implemented for OutputDimensions divisible by 16."); static constexpr IndexType PaddedInputDimensions = ceil_to_multiple(InputDimensions, MaxSimdWidth); static constexpr IndexType PaddedOutputDimensions = ceil_to_multiple(OutputDimensions, MaxSimdWidth); #if (USE_SSSE3 | (USE_NEON >= 8)) static constexpr IndexType ChunkSize = 4; #else static constexpr IndexType ChunkSize = 1; #endif using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0xCC03DAE4u; hashValue += OutputDimensions; hashValue ^= prevHash >> 1; hashValue ^= prevHash << 31; return hashValue; } static constexpr IndexType get_weight_index_scrambled(IndexType i) { return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + i / PaddedInputDimensions * ChunkSize + i % ChunkSize; } static constexpr IndexType get_weight_index(IndexType i) { #if (USE_SSSE3 | (USE_NEON >= 8)) return get_weight_index_scrambled(i); #else return i; #endif } // Read network parameters bool read_parameters(std::istream& stream) { read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); return !stream.fail(); } std::size_t get_content_hash() const { std::size_t h = 0; hash_combine(h, get_raw_data_hash(biases)); hash_combine(h, get_raw_data_hash(weights)); hash_combine(h, get_hash_value(0)); return h; } // Forward propagation void propagate(const InputType* input, OutputType* output) const { #if (USE_SSSE3 | (USE_NEON >= 8)) #if defined(USE_AVX512) using invec_t = __m512i; using outvec_t = __m512i; #define vec_add_32 _mm512_add_epi32 #define vec_set_32 _mm512_set1_epi32 #define vec_add_dpbusd_32 SIMD::m512_add_dpbusd_epi32 #elif defined(USE_AVX2) using invec_t = __m256i; using outvec_t = __m256i; #define vec_add_32 _mm256_add_epi32 #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 SIMD::m256_add_dpbusd_epi32 #elif defined(USE_SSSE3) using invec_t = __m128i; using outvec_t = __m128i; #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 SIMD::m128_add_dpbusd_epi32 #elif defined(USE_NEON_DOTPROD) using invec_t = int8x16_t; using outvec_t = int32x4_t; #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) #define vec_add_dpbusd_32 SIMD::dotprod_m128_add_dpbusd_epi32 #elif defined(USE_NEON) using invec_t = int8x16_t; using outvec_t = int32x4_t; #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) #define vec_add_dpbusd_32 SIMD::neon_m128_add_dpbusd_epi32 #endif constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType); constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; constexpr IndexType NumAccums = OutputDimensions / OutputSimdWidth; // If we're using high-latency dot product instructions, split the accumulators // to create 3 separate dependency chains and merge at the end constexpr IndexType NumRegs = #if defined(USE_VNNI) 3 * NumAccums; #else NumAccums; #endif std::uint16_t nnz[NumChunks]; IndexType count; // Find indices of nonzero 32-bit blocks find_nnz(input, nnz, count); const outvec_t* biasvec = reinterpret_cast(biases); outvec_t acc[NumRegs]; for (IndexType k = 0; k < NumAccums; ++k) acc[k] = biasvec[k]; const auto* start = nnz; const auto* end = nnz + count; // convince GCC to not do weird pointer arithmetic in the following loop const std::int8_t* weights_cp = weights; #if defined(USE_VNNI) for (IndexType k = NumAccums; k < NumRegs; ++k) acc[k] = vec_zero(); while (start < end - 2) { const std::ptrdiff_t i0 = *start++; const std::ptrdiff_t i1 = *start++; const std::ptrdiff_t i2 = *start++; const invec_t in0 = vec_set_32(load_as(input + i0 * sizeof(std::int32_t))); const invec_t in1 = vec_set_32(load_as(input + i1 * sizeof(std::int32_t))); const invec_t in2 = vec_set_32(load_as(input + i2 * sizeof(std::int32_t))); const auto col0 = reinterpret_cast(&weights_cp[i0 * OutputDimensions * ChunkSize]); const auto col1 = reinterpret_cast(&weights_cp[i1 * OutputDimensions * ChunkSize]); const auto col2 = reinterpret_cast(&weights_cp[i2 * OutputDimensions * ChunkSize]); for (IndexType k = 0; k < NumAccums; ++k) { vec_add_dpbusd_32(acc[k], in0, col0[k]); vec_add_dpbusd_32(acc[k + NumAccums], in1, col1[k]); vec_add_dpbusd_32(acc[k + 2 * NumAccums], in2, col2[k]); } } for (IndexType k = 0; k < NumAccums; ++k) acc[k] = vec_add_32(vec_add_32(acc[k], acc[k + NumAccums]), acc[k + 2 * NumAccums]); #endif while (start < end) { const std::ptrdiff_t i = *start++; const invec_t in = vec_set_32(load_as(input + i * sizeof(std::int32_t))); const auto col = reinterpret_cast(&weights_cp[i * OutputDimensions * ChunkSize]); for (IndexType k = 0; k < NumAccums; ++k) vec_add_dpbusd_32(acc[k], in, col[k]); } outvec_t* outptr = reinterpret_cast(output); for (IndexType k = 0; k < NumAccums; ++k) outptr[k] = acc[k]; #undef vec_set_32 #undef vec_add_dpbusd_32 #ifdef vec_add_32 #undef vec_add_32 #endif #else // Use dense implementation for the other architectures. affine_transform_non_ssse3( output, weights, biases, input); #endif } private: using BiasType = OutputType; using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; }; } // namespace Stockfish::Eval::NNUE::Layers #endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED ================================================ FILE: src/nnue/layers/clipped_relu.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Definition of layer ClippedReLU of NNUE evaluation function #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED #include #include #include #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Layers { // Clipped ReLU template class ClippedReLU { public: // Input/output type using InputType = std::int32_t; using OutputType = std::uint8_t; // Number of input/output dimensions static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; static constexpr IndexType PaddedOutputDimensions = ceil_to_multiple(OutputDimensions, 32); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0x538D24C7u; hashValue += prevHash; return hashValue; } // Read network parameters bool read_parameters(std::istream&) { return true; } // Write network parameters bool write_parameters(std::ostream&) const { return true; } std::size_t get_content_hash() const { std::size_t h = 0; hash_combine(h, get_hash_value(0)); return h; } // Forward propagation void propagate(const InputType* input, OutputType* output) const { #if defined(USE_AVX2) if constexpr (InputDimensions % SimdWidth == 0) { constexpr IndexType NumChunks = InputDimensions / SimdWidth; const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { const __m256i words0 = _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]), _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); const __m256i words1 = _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]), _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32( _mm256_packs_epi16(words0, words1), Offsets)); } } else { constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { const __m128i words0 = _mm_srli_epi16( _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); const __m128i words1 = _mm_srli_epi16( _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); } } constexpr IndexType Start = InputDimensions % SimdWidth == 0 ? InputDimensions / SimdWidth * SimdWidth : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); #elif defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / SimdWidth; #ifndef USE_SSE41 const __m128i k0x80s = _mm_set1_epi8(-128); #endif const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { #if defined(USE_SSE41) const __m128i words0 = _mm_srli_epi16( _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); const __m128i words1 = _mm_srli_epi16( _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); #else const __m128i words0 = _mm_srai_epi16( _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); const __m128i words1 = _mm_srai_epi16( _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); const __m128i packedbytes = _mm_packs_epi16(words0, words1); _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)); #endif } constexpr IndexType Start = NumChunks * SimdWidth; #elif defined(USE_NEON) constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); const SIMD::vec_i8x8_t Zero = {0}; const auto in = reinterpret_cast(input); const auto out = reinterpret_cast(output); for (IndexType i = 0; i < NumChunks; ++i) { int16x8_t shifted; const auto pack = reinterpret_cast(&shifted); pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); out[i] = vmax_s8(vqmovn_s16(shifted), Zero); } constexpr IndexType Start = NumChunks * (SimdWidth / 2); #else constexpr IndexType Start = 0; #endif for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast(std::clamp(input[i] >> WeightScaleBits, 0, 127)); } } }; } // namespace Stockfish::Eval::NNUE::Layers #endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED ================================================ FILE: src/nnue/layers/sqr_clipped_relu.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Definition of layer ClippedReLU of NNUE evaluation function #ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED #define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED #include #include #include #include "../nnue_common.h" namespace Stockfish::Eval::NNUE::Layers { // Clipped ReLU template class SqrClippedReLU { public: // Input/output type using InputType = std::int32_t; using OutputType = std::uint8_t; // Number of input/output dimensions static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; static constexpr IndexType PaddedOutputDimensions = ceil_to_multiple(OutputDimensions, 32); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { std::uint32_t hashValue = 0x538D24C7u; hashValue += prevHash; return hashValue; } // Read network parameters bool read_parameters(std::istream&) { return true; } // Write network parameters bool write_parameters(std::ostream&) const { return true; } std::size_t get_content_hash() const { std::size_t h = 0; hash_combine(h, get_hash_value(0)); return h; } // Forward propagation void propagate(const InputType* input, OutputType* output) const { #if defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / 16; static_assert(WeightScaleBits == 6); const auto in = reinterpret_cast(input); const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { __m128i words0 = _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])); __m128i words1 = _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])); // We shift by WeightScaleBits * 2 = 12 and divide by 128 // which is an additional shift-right of 7, meaning 19 in total. // MulHi strips the lower 16 bits so we need to shift out 3 more to match. words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); } constexpr IndexType Start = NumChunks * 16; #else constexpr IndexType Start = 0; #endif for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast( // Really should be /127 but we need to make it fast so we right-shift // by an extra 7 bits instead. Needs to be accounted for in the trainer. std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7))); } } }; } // namespace Stockfish::Eval::NNUE::Layers #endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED ================================================ FILE: src/nnue/network.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "network.h" #include #include #include #include #include #include #define INCBIN_SILENCE_BITCODE_WARNING #include "../incbin/incbin.h" #include "../evaluate.h" #include "../misc.h" #include "../position.h" #include "../types.h" #include "nnue_architecture.h" #include "nnue_common.h" #include "nnue_misc.h" // Macro to embed the default efficiently updatable neural network (NNUE) file // data in the engine binary (using incbin.h, by Dale Weiler). // This macro invocation will declare the following three variables // const unsigned char gEmbeddedNNUEData[]; // a pointer to the embedded data // const unsigned char *const gEmbeddedNNUEEnd; // a marker to the end // const unsigned int gEmbeddedNNUESize; // the size of the embedded file // Note that this does not work in Microsoft Visual Studio. #if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig); INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall); #else const unsigned char gEmbeddedNNUEBigData[1] = {0x0}; const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1]; const unsigned int gEmbeddedNNUEBigSize = 1; const unsigned char gEmbeddedNNUESmallData[1] = {0x0}; const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1]; const unsigned int gEmbeddedNNUESmallSize = 1; #endif namespace { struct EmbeddedNNUE { EmbeddedNNUE(const unsigned char* embeddedData, const unsigned char* embeddedEnd, const unsigned int embeddedSize) : data(embeddedData), end(embeddedEnd), size(embeddedSize) {} const unsigned char* data; const unsigned char* end; const unsigned int size; }; using namespace Stockfish::Eval::NNUE; EmbeddedNNUE get_embedded(EmbeddedNNUEType type) { if (type == EmbeddedNNUEType::BIG) return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize); else return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize); } } namespace Stockfish::Eval::NNUE { namespace Detail { // Read evaluation function parameters template bool read_parameters(std::istream& stream, T& reference) { std::uint32_t header; header = read_little_endian(stream); if (!stream || header != T::get_hash_value()) return false; return reference.read_parameters(stream); } // Write evaluation function parameters template bool write_parameters(std::ostream& stream, const T& reference) { write_little_endian(stream, T::get_hash_value()); return reference.write_parameters(stream); } } // namespace Detail template void Network::load(const std::string& rootDirectory, std::string evalfilePath) { #if defined(DEFAULT_NNUE_DIRECTORY) std::vector dirs = {"", "", rootDirectory, stringify(DEFAULT_NNUE_DIRECTORY)}; #else std::vector dirs = {"", "", rootDirectory}; #endif if (evalfilePath.empty()) evalfilePath = evalFile.defaultName; for (const auto& directory : dirs) { if (std::string(evalFile.current) != evalfilePath) { if (directory != "") { load_user_net(directory, evalfilePath); } if (directory == "" && evalfilePath == std::string(evalFile.defaultName)) { load_internal(); } } } } template bool Network::save(const std::optional& filename) const { std::string actualFilename; std::string msg; if (filename.has_value()) actualFilename = filename.value(); else { if (std::string(evalFile.current) != std::string(evalFile.defaultName)) { msg = "Failed to export a net. " "A non-embedded net can only be saved if the filename is specified"; sync_cout << msg << sync_endl; return false; } actualFilename = evalFile.defaultName; } std::ofstream stream(actualFilename, std::ios_base::binary); bool saved = save(stream, evalFile.current, evalFile.netDescription); msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; sync_cout << msg << sync_endl; return saved; } template NetworkOutput Network::evaluate(const Position& pos, AccumulatorStack& accumulatorStack, AccumulatorCaches::Cache& cache) const { constexpr uint64_t alignment = CacheLineSize; alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; ASSERT_ALIGNED(transformedFeatures, alignment); const int bucket = (pos.count() - 1) / 4; const auto psqt = featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket); const auto positional = network[bucket].propagate(transformedFeatures); return {static_cast(psqt / OutputScale), static_cast(positional / OutputScale)}; } template void Network::verify(std::string evalfilePath, const std::function& f) const { if (evalfilePath.empty()) evalfilePath = evalFile.defaultName; if (std::string(evalFile.current) != evalfilePath) { if (f) { std::string msg1 = "Network evaluation parameters compatible with the engine must be available."; std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully."; std::string msg3 = "The UCI option EvalFile might need to specify the full path, " "including the directory name, to the network file."; std::string msg4 = "The default net can be downloaded from: " "https://tests.stockfishchess.org/api/nn/" + std::string(evalFile.defaultName); std::string msg5 = "The engine will be terminated now."; std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3 + '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n'; f(msg); } exit(EXIT_FAILURE); } if (f) { size_t size = sizeof(featureTransformer) + sizeof(Arch) * LayerStacks; f("NNUE evaluation using " + evalfilePath + " (" + std::to_string(size / (1024 * 1024)) + "MiB, (" + std::to_string(featureTransformer.TotalInputDimensions) + ", " + std::to_string(network[0].TransformedFeatureDimensions) + ", " + std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS) + ", 1))"); } } template NnueEvalTrace Network::trace_evaluate(const Position& pos, AccumulatorStack& accumulatorStack, AccumulatorCaches::Cache& cache) const { constexpr uint64_t alignment = CacheLineSize; alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; ASSERT_ALIGNED(transformedFeatures, alignment); NnueEvalTrace t{}; t.correctBucket = (pos.count() - 1) / 4; for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) { const auto materialist = featureTransformer.transform(pos, accumulatorStack, cache, transformedFeatures, bucket); const auto positional = network[bucket].propagate(transformedFeatures); t.psqt[bucket] = static_cast(materialist / OutputScale); t.positional[bucket] = static_cast(positional / OutputScale); } return t; } template void Network::load_user_net(const std::string& dir, const std::string& evalfilePath) { std::ifstream stream(dir + evalfilePath, std::ios::binary); auto description = load(stream); if (description.has_value()) { evalFile.current = evalfilePath; evalFile.netDescription = description.value(); } } template void Network::load_internal() { // C++ way to prepare a buffer for a memory stream class MemoryBuffer: public std::basic_streambuf { public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } }; const auto embedded = get_embedded(embeddedType); MemoryBuffer buffer(const_cast(reinterpret_cast(embedded.data)), size_t(embedded.size)); std::istream stream(&buffer); auto description = load(stream); if (description.has_value()) { evalFile.current = evalFile.defaultName; evalFile.netDescription = description.value(); } } template void Network::initialize() { initialized = true; } template bool Network::save(std::ostream& stream, const std::string& name, const std::string& netDescription) const { if (name.empty() || name == "None") return false; return write_parameters(stream, netDescription); } template std::optional Network::load(std::istream& stream) { initialize(); std::string description; return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt; } template std::size_t Network::get_content_hash() const { if (!initialized) return 0; std::size_t h = 0; hash_combine(h, featureTransformer); for (auto&& layerstack : network) hash_combine(h, layerstack); hash_combine(h, evalFile); hash_combine(h, static_cast(embeddedType)); return h; } // Read network header template bool Network::read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) const { std::uint32_t version, size; version = read_little_endian(stream); *hashValue = read_little_endian(stream); size = read_little_endian(stream); if (!stream || version != Version) return false; desc->resize(size); stream.read(&(*desc)[0], size); return !stream.fail(); } // Write network header template bool Network::write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) const { write_little_endian(stream, Version); write_little_endian(stream, hashValue); write_little_endian(stream, std::uint32_t(desc.size())); stream.write(&desc[0], desc.size()); return !stream.fail(); } template bool Network::read_parameters(std::istream& stream, std::string& netDescription) { std::uint32_t hashValue; if (!read_header(stream, &hashValue, &netDescription)) return false; if (hashValue != Network::hash) return false; if (!Detail::read_parameters(stream, featureTransformer)) return false; for (std::size_t i = 0; i < LayerStacks; ++i) { if (!Detail::read_parameters(stream, network[i])) return false; } return stream && stream.peek() == std::ios::traits_type::eof(); } template bool Network::write_parameters(std::ostream& stream, const std::string& netDescription) const { if (!write_header(stream, Network::hash, netDescription)) return false; if (!Detail::write_parameters(stream, featureTransformer)) return false; for (std::size_t i = 0; i < LayerStacks; ++i) { if (!Detail::write_parameters(stream, network[i])) return false; } return bool(stream); } // Explicit template instantiations template class Network, FeatureTransformer>; template class Network, FeatureTransformer>; } // namespace Stockfish::Eval::NNUE ================================================ FILE: src/nnue/network.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef NETWORK_H_INCLUDED #define NETWORK_H_INCLUDED #include #include #include #include #include #include #include #include #include #include "../misc.h" #include "../types.h" #include "nnue_accumulator.h" #include "nnue_architecture.h" #include "nnue_common.h" #include "nnue_feature_transformer.h" #include "nnue_misc.h" namespace Stockfish { class Position; } namespace Stockfish::Eval::NNUE { enum class EmbeddedNNUEType { BIG, SMALL, }; using NetworkOutput = std::tuple; // The network must be a trivial type, i.e. the memory must be in-line. // This is required to allow sharing the network via shared memory, as // there is no way to run destructors. template class Network { static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions; public: Network(EvalFile file, EmbeddedNNUEType type) : evalFile(file), embeddedType(type) {} Network(const Network& other) = default; Network(Network&& other) = default; Network& operator=(const Network& other) = default; Network& operator=(Network&& other) = default; void load(const std::string& rootDirectory, std::string evalfilePath); bool save(const std::optional& filename) const; std::size_t get_content_hash() const; NetworkOutput evaluate(const Position& pos, AccumulatorStack& accumulatorStack, AccumulatorCaches::Cache& cache) const; void verify(std::string evalfilePath, const std::function&) const; NnueEvalTrace trace_evaluate(const Position& pos, AccumulatorStack& accumulatorStack, AccumulatorCaches::Cache& cache) const; private: void load_user_net(const std::string&, const std::string&); void load_internal(); void initialize(); bool save(std::ostream&, const std::string&, const std::string&) const; std::optional load(std::istream&); bool read_header(std::istream&, std::uint32_t*, std::string*) const; bool write_header(std::ostream&, std::uint32_t, const std::string&) const; bool read_parameters(std::istream&, std::string&); bool write_parameters(std::ostream&, const std::string&) const; // Input feature converter Transformer featureTransformer; // Evaluation function Arch network[LayerStacks]; EvalFile evalFile; EmbeddedNNUEType embeddedType; bool initialized = false; // Hash value of evaluation function structure static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value(); template friend struct AccumulatorCaches::Cache; }; // Definitions of the network types using SmallFeatureTransformer = FeatureTransformer; using SmallNetworkArchitecture = NetworkArchitecture; using BigFeatureTransformer = FeatureTransformer; using BigNetworkArchitecture = NetworkArchitecture; using NetworkBig = Network; using NetworkSmall = Network; struct Networks { Networks(EvalFile bigFile, EvalFile smallFile) : big(bigFile, EmbeddedNNUEType::BIG), small(smallFile, EmbeddedNNUEType::SMALL) {} NetworkBig big; NetworkSmall small; }; } // namespace Stockfish template struct std::hash> { std::size_t operator()( const Stockfish::Eval::NNUE::Network& network) const noexcept { return network.get_content_hash(); } }; template<> struct std::hash { std::size_t operator()(const Stockfish::Eval::NNUE::Networks& networks) const noexcept { std::size_t h = 0; Stockfish::hash_combine(h, networks.big); Stockfish::hash_combine(h, networks.small); return h; } }; #endif ================================================ FILE: src/nnue/nnue_accumulator.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "nnue_accumulator.h" #include #include #include #include #include "../bitboard.h" #include "../misc.h" #include "../position.h" #include "../types.h" #include "features/half_ka_v2_hm.h" #include "nnue_architecture.h" #include "nnue_common.h" #include "nnue_feature_transformer.h" // IWYU pragma: keep #include "simd.h" namespace Stockfish::Eval::NNUE { using namespace SIMD; namespace { template void double_inc_update(Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& middle_state, AccumulatorState& target_state, const AccumulatorState& computed); template void double_inc_update(Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& middle_state, AccumulatorState& target_state, const AccumulatorState& computed, const DirtyPiece& dp2); template void update_accumulator_incremental( Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& target_state, const AccumulatorState& computed); template void update_accumulator_refresh_cache(Color perspective, const FeatureTransformer& featureTransformer, const Position& pos, AccumulatorState& accumulatorState, AccumulatorCaches::Cache& cache); template void update_threats_accumulator_full(Color perspective, const FeatureTransformer& featureTransformer, const Position& pos, AccumulatorState& accumulatorState); } template const AccumulatorState& AccumulatorStack::latest() const noexcept { return accumulators()[size - 1]; } // Explicit template instantiations template const AccumulatorState& AccumulatorStack::latest() const noexcept; template const AccumulatorState& AccumulatorStack::latest() const noexcept; template AccumulatorState& AccumulatorStack::mut_latest() noexcept { return mut_accumulators()[size - 1]; } template const std::array, AccumulatorStack::MaxSize>& AccumulatorStack::accumulators() const noexcept { static_assert(std::is_same_v || std::is_same_v, "Invalid Feature Set Type"); if constexpr (std::is_same_v) return psq_accumulators; if constexpr (std::is_same_v) return threat_accumulators; } template std::array, AccumulatorStack::MaxSize>& AccumulatorStack::mut_accumulators() noexcept { static_assert(std::is_same_v || std::is_same_v, "Invalid Feature Set Type"); if constexpr (std::is_same_v) return psq_accumulators; if constexpr (std::is_same_v) return threat_accumulators; } void AccumulatorStack::reset() noexcept { psq_accumulators[0].reset({}); threat_accumulators[0].reset({}); size = 1; } std::pair AccumulatorStack::push() noexcept { assert(size < MaxSize); auto& dp = psq_accumulators[size].reset(); auto& dts = threat_accumulators[size].reset(); new (&dts) DirtyThreats; size++; return {dp, dts}; } void AccumulatorStack::pop() noexcept { assert(size > 1); size--; } template void AccumulatorStack::evaluate(const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept { constexpr bool UseThreats = (Dimensions == TransformedFeatureDimensionsBig); evaluate_side(WHITE, pos, featureTransformer, cache); if (UseThreats) evaluate_side(WHITE, pos, featureTransformer, cache); evaluate_side(BLACK, pos, featureTransformer, cache); if (UseThreats) evaluate_side(BLACK, pos, featureTransformer, cache); } template void AccumulatorStack::evaluate_side(Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept { const auto last_usable_accum = find_last_usable_accumulator(perspective); if ((accumulators()[last_usable_accum].template acc()) .computed[perspective]) forward_update_incremental(perspective, pos, featureTransformer, last_usable_accum); else { if constexpr (std::is_same_v) update_accumulator_refresh_cache(perspective, featureTransformer, pos, mut_latest(), cache); else update_threats_accumulator_full(perspective, featureTransformer, pos, mut_latest()); backward_update_incremental(perspective, pos, featureTransformer, last_usable_accum); } } // Find the earliest usable accumulator, this can either be a computed accumulator or the accumulator // state just before a change that requires full refresh. template std::size_t AccumulatorStack::find_last_usable_accumulator(Color perspective) const noexcept { for (std::size_t curr_idx = size - 1; curr_idx > 0; curr_idx--) { if ((accumulators()[curr_idx].template acc()).computed[perspective]) return curr_idx; if (FeatureSet::requires_refresh(accumulators()[curr_idx].diff, perspective)) return curr_idx; } return 0; } template void AccumulatorStack::forward_update_incremental( Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, const std::size_t begin) noexcept { assert(begin < accumulators().size()); assert((accumulators()[begin].template acc()).computed[perspective]); const Square ksq = pos.square(perspective); for (std::size_t next = begin + 1; next < size; next++) { if (next + 1 < size) { DirtyPiece& dp1 = mut_accumulators()[next].diff; DirtyPiece& dp2 = mut_accumulators()[next + 1].diff; auto& accumulators = mut_accumulators(); if constexpr (std::is_same_v) { if (dp2.remove_sq != SQ_NONE && (accumulators[next].diff.threateningSqs & square_bb(dp2.remove_sq))) { double_inc_update(perspective, featureTransformer, ksq, accumulators[next], accumulators[next + 1], accumulators[next - 1], dp2); next++; continue; } } if constexpr (std::is_same_v) { if (dp1.to != SQ_NONE && dp1.to == dp2.remove_sq) { const Square captureSq = dp1.to; dp1.to = dp2.remove_sq = SQ_NONE; double_inc_update(perspective, featureTransformer, ksq, accumulators[next], accumulators[next + 1], accumulators[next - 1]); dp1.to = dp2.remove_sq = captureSq; next++; continue; } } } update_accumulator_incremental(perspective, featureTransformer, ksq, mut_accumulators()[next], accumulators()[next - 1]); } assert((latest().acc()).computed[perspective]); } template void AccumulatorStack::backward_update_incremental( Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, const std::size_t end) noexcept { assert(end < accumulators().size()); assert(end < size); assert((latest().template acc()).computed[perspective]); const Square ksq = pos.square(perspective); for (std::int64_t next = std::int64_t(size) - 2; next >= std::int64_t(end); next--) update_accumulator_incremental(perspective, featureTransformer, ksq, mut_accumulators()[next], accumulators()[next + 1]); assert((accumulators()[end].template acc()).computed[perspective]); } // Explicit template instantiations template void AccumulatorStack::evaluate( const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept; template void AccumulatorStack::evaluate( const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept; namespace { template, bool> = true> void fused_row_reduce(const ElementType* in, ElementType* out, const Ts* const... rows) { constexpr IndexType size = Width * sizeof(ElementType) / sizeof(typename VectorWrapper::type); auto* vecIn = reinterpret_cast(in); auto* vecOut = reinterpret_cast(out); for (IndexType i = 0; i < size; ++i) vecOut[i] = fused( vecIn[i], reinterpret_cast(rows)[i]...); } template struct AccumulatorUpdateContext { Color perspective; const FeatureTransformer& featureTransformer; const AccumulatorState& from; AccumulatorState& to; AccumulatorUpdateContext(Color persp, const FeatureTransformer& ft, const AccumulatorState& accF, AccumulatorState& accT) noexcept : perspective{persp}, featureTransformer{ft}, from{accF}, to{accT} {} template, bool> = true> void apply(const Ts... indices) { auto to_weight_vector = [&](const IndexType index) { return &featureTransformer.weights[index * Dimensions]; }; auto to_psqt_weight_vector = [&](const IndexType index) { return &featureTransformer.psqtWeights[index * PSQTBuckets]; }; fused_row_reduce( (from.template acc()).accumulation[perspective].data(), (to.template acc()).accumulation[perspective].data(), to_weight_vector(indices)...); fused_row_reduce( (from.template acc()).psqtAccumulation[perspective].data(), (to.template acc()).psqtAccumulation[perspective].data(), to_psqt_weight_vector(indices)...); } void apply(const typename FeatureSet::IndexList& added, const typename FeatureSet::IndexList& removed) { const auto& fromAcc = from.template acc().accumulation[perspective]; auto& toAcc = to.template acc().accumulation[perspective]; const auto& fromPsqtAcc = from.template acc().psqtAccumulation[perspective]; auto& toPsqtAcc = to.template acc().psqtAccumulation[perspective]; #ifdef VECTOR using Tiling = SIMDTiling; vec_t acc[Tiling::NumRegs]; psqt_vec_t psqt[Tiling::NumPsqtRegs]; const auto* threatWeights = &featureTransformer.threatWeights[0]; for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) { auto* fromTile = reinterpret_cast(&fromAcc[j * Tiling::TileHeight]); auto* toTile = reinterpret_cast(&toAcc[j * Tiling::TileHeight]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = fromTile[k]; for (int i = 0; i < removed.ssize(); ++i) { size_t index = removed[i]; const size_t offset = Dimensions * index; auto* column = reinterpret_cast(&threatWeights[offset]); #ifdef USE_NEON for (IndexType k = 0; k < Tiling::NumRegs; k += 2) { acc[k] = vec_sub_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); acc[k + 1] = vec_sub_16(acc[k + 1], vmovl_high_s8(column[k / 2])); } #else for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_sub_16(acc[k], vec_convert_8_16(column[k])); #endif } for (int i = 0; i < added.ssize(); ++i) { size_t index = added[i]; const size_t offset = Dimensions * index; auto* column = reinterpret_cast(&threatWeights[offset]); #ifdef USE_NEON for (IndexType k = 0; k < Tiling::NumRegs; k += 2) { acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2])); } #else for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k])); #endif } for (IndexType k = 0; k < Tiling::NumRegs; k++) vec_store(&toTile[k], acc[k]); threatWeights += Tiling::TileHeight; } for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) { auto* fromTilePsqt = reinterpret_cast(&fromPsqtAcc[j * Tiling::PsqtTileHeight]); auto* toTilePsqt = reinterpret_cast(&toPsqtAcc[j * Tiling::PsqtTileHeight]); for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = fromTilePsqt[k]; for (int i = 0; i < removed.ssize(); ++i) { size_t index = removed[i]; const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; auto* columnPsqt = reinterpret_cast( &featureTransformer.threatPsqtWeights[offset]); for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); } for (int i = 0; i < added.ssize(); ++i) { size_t index = added[i]; const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; auto* columnPsqt = reinterpret_cast( &featureTransformer.threatPsqtWeights[offset]); for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) vec_store_psqt(&toTilePsqt[k], psqt[k]); } #else toAcc = fromAcc; toPsqtAcc = fromPsqtAcc; for (const auto index : removed) { const IndexType offset = Dimensions * index; for (IndexType j = 0; j < Dimensions; ++j) toAcc[j] -= featureTransformer.threatWeights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) toPsqtAcc[k] -= featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; } for (const auto index : added) { const IndexType offset = Dimensions * index; for (IndexType j = 0; j < Dimensions; ++j) toAcc[j] += featureTransformer.threatWeights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) toPsqtAcc[k] += featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; } #endif } }; template auto make_accumulator_update_context(Color perspective, const FeatureTransformer& featureTransformer, const AccumulatorState& accumulatorFrom, AccumulatorState& accumulatorTo) noexcept { return AccumulatorUpdateContext{perspective, featureTransformer, accumulatorFrom, accumulatorTo}; } template void double_inc_update(Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& middle_state, AccumulatorState& target_state, const AccumulatorState& computed) { assert(computed.acc().computed[perspective]); assert(!middle_state.acc().computed[perspective]); assert(!target_state.acc().computed[perspective]); PSQFeatureSet::IndexList removed, added; PSQFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added); // you can't capture a piece that was just involved in castling since the rook ends up // in a square that the king passed assert(added.size() < 2); PSQFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added); [[maybe_unused]] const int addedSize = added.ssize(); [[maybe_unused]] const int removedSize = removed.ssize(); assert(addedSize == 1); assert(removedSize == 2 || removedSize == 3); // Workaround compiler warning for uninitialized variables, replicated on // profile builds on windows with gcc 14.2.0. // Also helps with optimizations on some compilers. sf_assume(addedSize == 1); sf_assume(removedSize == 2 || removedSize == 3); auto updateContext = make_accumulator_update_context(perspective, featureTransformer, computed, target_state); if (removedSize == 2) { updateContext.template apply(added[0], removed[0], removed[1]); } else { updateContext.template apply(added[0], removed[0], removed[1], removed[2]); } target_state.acc().computed[perspective] = true; } template void double_inc_update(Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& middle_state, AccumulatorState& target_state, const AccumulatorState& computed, const DirtyPiece& dp2) { assert(computed.acc().computed[perspective]); assert(!middle_state.acc().computed[perspective]); assert(!target_state.acc().computed[perspective]); ThreatFeatureSet::FusedUpdateData fusedData; fusedData.dp2removed = dp2.remove_sq; ThreatFeatureSet::IndexList removed, added; const auto* pfBase = &featureTransformer.threatWeights[0]; auto pfStride = static_cast(TransformedFeatureDimensions); ThreatFeatureSet::append_changed_indices(perspective, ksq, middle_state.diff, removed, added, &fusedData, true, pfBase, pfStride); ThreatFeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added, &fusedData, false, pfBase, pfStride); auto updateContext = make_accumulator_update_context(perspective, featureTransformer, computed, target_state); updateContext.apply(added, removed); target_state.acc().computed[perspective] = true; } template void update_accumulator_incremental( Color perspective, const FeatureTransformer& featureTransformer, const Square ksq, AccumulatorState& target_state, const AccumulatorState& computed) { assert((computed.template acc()).computed[perspective]); assert(!(target_state.template acc()).computed[perspective]); // The size must be enough to contain the largest possible update. // That might depend on the feature set and generally relies on the // feature set's update cost calculation to be correct and never allow // updates with more added/removed features than MaxActiveDimensions. // In this case, the maximum size of both feature addition and removal // is 2, since we are incrementally updating one move at a time. typename FeatureSet::IndexList removed, added; if constexpr (std::is_same_v) { const auto* pfBase = &featureTransformer.threatWeights[0]; auto pfStride = static_cast(TransformedFeatureDimensions); if constexpr (Forward) FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added, nullptr, false, pfBase, pfStride); else FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed, nullptr, false, pfBase, pfStride); } else { if constexpr (Forward) FeatureSet::append_changed_indices(perspective, ksq, target_state.diff, removed, added); else FeatureSet::append_changed_indices(perspective, ksq, computed.diff, added, removed); } auto updateContext = make_accumulator_update_context(perspective, featureTransformer, computed, target_state); if constexpr (std::is_same_v) updateContext.apply(added, removed); else { [[maybe_unused]] const int addedSize = added.ssize(); [[maybe_unused]] const int removedSize = removed.ssize(); assert(addedSize == 1 || addedSize == 2); assert(removedSize == 1 || removedSize == 2); assert((Forward && addedSize <= removedSize) || (!Forward && addedSize >= removedSize)); // Workaround compiler warning for uninitialized variables, replicated // on profile builds on windows with gcc 14.2.0. // Also helps with optimizations on some compilers. sf_assume(addedSize == 1 || addedSize == 2); sf_assume(removedSize == 1 || removedSize == 2); if (!(removedSize == 1 || removedSize == 2) || !(addedSize == 1 || addedSize == 2)) sf_unreachable(); if ((Forward && removedSize == 1) || (!Forward && addedSize == 1)) { assert(addedSize == 1 && removedSize == 1); updateContext.template apply(added[0], removed[0]); } else if (Forward && addedSize == 1) { assert(removedSize == 2); updateContext.template apply(added[0], removed[0], removed[1]); } else if (!Forward && removedSize == 1) { assert(addedSize == 2); updateContext.template apply(added[0], added[1], removed[0]); } else { assert(addedSize == 2 && removedSize == 2); updateContext.template apply(added[0], added[1], removed[0], removed[1]); } } (target_state.template acc()).computed[perspective] = true; } Bitboard get_changed_pieces(const std::array& oldPieces, const std::array& newPieces) { #if defined(USE_AVX512) || defined(USE_AVX2) static_assert(sizeof(Piece) == 1); Bitboard sameBB = 0; for (int i = 0; i < 64; i += 32) { const __m256i old_v = _mm256_loadu_si256(reinterpret_cast(&oldPieces[i])); const __m256i new_v = _mm256_loadu_si256(reinterpret_cast(&newPieces[i])); const __m256i cmpEqual = _mm256_cmpeq_epi8(old_v, new_v); const std::uint32_t equalMask = _mm256_movemask_epi8(cmpEqual); sameBB |= static_cast(equalMask) << i; } return ~sameBB; #elif defined(USE_NEON) uint8x16x4_t old_v = vld4q_u8(reinterpret_cast(oldPieces.data())); uint8x16x4_t new_v = vld4q_u8(reinterpret_cast(newPieces.data())); auto cmp = [=](const int i) { return vceqq_u8(old_v.val[i], new_v.val[i]); }; uint8x16_t cmp0_1 = vsriq_n_u8(cmp(1), cmp(0), 1); uint8x16_t cmp2_3 = vsriq_n_u8(cmp(3), cmp(2), 1); uint8x16_t merged = vsriq_n_u8(cmp2_3, cmp0_1, 2); merged = vsriq_n_u8(merged, merged, 4); uint8x8_t sameBB = vshrn_n_u16(vreinterpretq_u16_u8(merged), 4); return ~vget_lane_u64(vreinterpret_u64_u8(sameBB), 0); #else Bitboard changed = 0; for (Square sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) changed |= static_cast(oldPieces[sq] != newPieces[sq]) << sq; return changed; #endif } template void update_accumulator_refresh_cache(Color perspective, const FeatureTransformer& featureTransformer, const Position& pos, AccumulatorState& accumulatorState, AccumulatorCaches::Cache& cache) { using Tiling [[maybe_unused]] = SIMDTiling; const Square ksq = pos.square(perspective); auto& entry = cache[ksq][perspective]; PSQFeatureSet::IndexList removed, added; const Bitboard changedBB = get_changed_pieces(entry.pieces, pos.piece_array()); Bitboard removedBB = changedBB & entry.pieceBB; Bitboard addedBB = changedBB & pos.pieces(); #if defined(USE_AVX512ICL) PSQFeatureSet::write_indices(entry.pieces, pos.piece_array(), removedBB, addedBB, perspective, ksq, removed, added); #else while (removedBB) { Square sq = pop_lsb(removedBB); removed.push_back(PSQFeatureSet::make_index(perspective, sq, entry.pieces[sq], ksq)); } while (addedBB) { Square sq = pop_lsb(addedBB); added.push_back(PSQFeatureSet::make_index(perspective, sq, pos.piece_on(sq), ksq)); } #endif entry.pieceBB = pos.pieces(); entry.pieces = pos.piece_array(); auto& accumulator = accumulatorState.acc(); accumulator.computed[perspective] = true; #ifdef VECTOR vec_t acc[Tiling::NumRegs]; psqt_vec_t psqt[Tiling::NumPsqtRegs]; const auto* weights = &featureTransformer.weights[0]; for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) { auto* accTile = reinterpret_cast(&accumulator.accumulation[perspective][j * Tiling::TileHeight]); auto* entryTile = reinterpret_cast(&entry.accumulation[j * Tiling::TileHeight]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = entryTile[k]; int i = 0; for (; i < std::min(removed.ssize(), added.ssize()); ++i) { size_t indexR = removed[i]; const size_t offsetR = Dimensions * indexR; auto* columnR = reinterpret_cast(&weights[offsetR]); size_t indexA = added[i]; const size_t offsetA = Dimensions * indexA; auto* columnA = reinterpret_cast(&weights[offsetA]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = fused(acc[k], columnA[k], columnR[k]); } for (; i < removed.ssize(); ++i) { size_t index = removed[i]; const size_t offset = Dimensions * index; auto* column = reinterpret_cast(&weights[offset]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_sub_16(acc[k], column[k]); } for (; i < added.ssize(); ++i) { size_t index = added[i]; const size_t offset = Dimensions * index; auto* column = reinterpret_cast(&weights[offset]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_add_16(acc[k], column[k]); } for (IndexType k = 0; k < Tiling::NumRegs; k++) vec_store(&entryTile[k], acc[k]); for (IndexType k = 0; k < Tiling::NumRegs; k++) vec_store(&accTile[k], acc[k]); weights += Tiling::TileHeight; } for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) { auto* accTilePsqt = reinterpret_cast( &accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]); auto* entryTilePsqt = reinterpret_cast(&entry.psqtAccumulation[j * Tiling::PsqtTileHeight]); for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = entryTilePsqt[k]; for (int i = 0; i < removed.ssize(); ++i) { size_t index = removed[i]; const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; auto* columnPsqt = reinterpret_cast(&featureTransformer.psqtWeights[offset]); for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); } for (int i = 0; i < added.ssize(); ++i) { size_t index = added[i]; const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; auto* columnPsqt = reinterpret_cast(&featureTransformer.psqtWeights[offset]); for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) vec_store_psqt(&entryTilePsqt[k], psqt[k]); for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqt[k], psqt[k]); } #else for (const auto index : removed) { const IndexType offset = Dimensions * index; for (IndexType j = 0; j < Dimensions; ++j) entry.accumulation[j] -= featureTransformer.weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) entry.psqtAccumulation[k] -= featureTransformer.psqtWeights[index * PSQTBuckets + k]; } for (const auto index : added) { const IndexType offset = Dimensions * index; for (IndexType j = 0; j < Dimensions; ++j) entry.accumulation[j] += featureTransformer.weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) entry.psqtAccumulation[k] += featureTransformer.psqtWeights[index * PSQTBuckets + k]; } // The accumulator of the refresh entry has been updated. // Now copy its content to the actual accumulator we were refreshing. accumulator.accumulation[perspective] = entry.accumulation; accumulator.psqtAccumulation[perspective] = entry.psqtAccumulation; #endif } template void update_threats_accumulator_full(Color perspective, const FeatureTransformer& featureTransformer, const Position& pos, AccumulatorState& accumulatorState) { using Tiling [[maybe_unused]] = SIMDTiling; ThreatFeatureSet::IndexList active; ThreatFeatureSet::append_active_indices(perspective, pos, active); auto& accumulator = accumulatorState.acc(); accumulator.computed[perspective] = true; #ifdef VECTOR vec_t acc[Tiling::NumRegs]; psqt_vec_t psqt[Tiling::NumPsqtRegs]; const auto* threatWeights = &featureTransformer.threatWeights[0]; for (IndexType j = 0; j < Dimensions / Tiling::TileHeight; ++j) { auto* accTile = reinterpret_cast(&accumulator.accumulation[perspective][j * Tiling::TileHeight]); for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_zero(); int i = 0; for (; i < active.ssize(); ++i) { size_t index = active[i]; const size_t offset = Dimensions * index; auto* column = reinterpret_cast(&threatWeights[offset]); #ifdef USE_NEON for (IndexType k = 0; k < Tiling::NumRegs; k += 2) { acc[k] = vec_add_16(acc[k], vmovl_s8(vget_low_s8(column[k / 2]))); acc[k + 1] = vec_add_16(acc[k + 1], vmovl_high_s8(column[k / 2])); } #else for (IndexType k = 0; k < Tiling::NumRegs; ++k) acc[k] = vec_add_16(acc[k], vec_convert_8_16(column[k])); #endif } for (IndexType k = 0; k < Tiling::NumRegs; k++) vec_store(&accTile[k], acc[k]); threatWeights += Tiling::TileHeight; } for (IndexType j = 0; j < PSQTBuckets / Tiling::PsqtTileHeight; ++j) { auto* accTilePsqt = reinterpret_cast( &accumulator.psqtAccumulation[perspective][j * Tiling::PsqtTileHeight]); for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_zero_psqt(); for (int i = 0; i < active.ssize(); ++i) { size_t index = active[i]; const size_t offset = PSQTBuckets * index + j * Tiling::PsqtTileHeight; auto* columnPsqt = reinterpret_cast(&featureTransformer.threatPsqtWeights[offset]); for (std::size_t k = 0; k < Tiling::NumPsqtRegs; ++k) psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } for (IndexType k = 0; k < Tiling::NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqt[k], psqt[k]); } #else for (IndexType j = 0; j < Dimensions; ++j) accumulator.accumulation[perspective][j] = 0; for (std::size_t k = 0; k < PSQTBuckets; ++k) accumulator.psqtAccumulation[perspective][k] = 0; for (const auto index : active) { const IndexType offset = Dimensions * index; for (IndexType j = 0; j < Dimensions; ++j) accumulator.accumulation[perspective][j] += featureTransformer.threatWeights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) accumulator.psqtAccumulation[perspective][k] += featureTransformer.threatPsqtWeights[index * PSQTBuckets + k]; } #endif } } } ================================================ FILE: src/nnue/nnue_accumulator.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Class for difference calculation of NNUE evaluation function #ifndef NNUE_ACCUMULATOR_H_INCLUDED #define NNUE_ACCUMULATOR_H_INCLUDED #include #include #include #include #include #include "../types.h" #include "nnue_architecture.h" #include "nnue_common.h" namespace Stockfish { class Position; } namespace Stockfish::Eval::NNUE { template struct alignas(CacheLineSize) Accumulator; template class FeatureTransformer; // Class that holds the result of affine transformation of input features template struct alignas(CacheLineSize) Accumulator { std::array, COLOR_NB> accumulation; std::array, COLOR_NB> psqtAccumulation; std::array computed = {}; }; // AccumulatorCaches struct provides per-thread accumulator caches, where each // cache contains multiple entries for each of the possible king squares. // When the accumulator needs to be refreshed, the cached entry is used to more // efficiently update the accumulator, instead of rebuilding it from scratch. // This idea, was first described by Luecx (author of Koivisto) and // is commonly referred to as "Finny Tables". struct AccumulatorCaches { template AccumulatorCaches(const Networks& networks) { clear(networks); } template struct alignas(CacheLineSize) Cache { struct alignas(CacheLineSize) Entry { std::array accumulation; std::array psqtAccumulation; std::array pieces; Bitboard pieceBB; // To initialize a refresh entry, we set all its bitboards empty, // so we put the biases in the accumulation, without any weights on top void clear(const std::array& biases) { accumulation = biases; std::memset(reinterpret_cast(this) + offsetof(Entry, psqtAccumulation), 0, sizeof(Entry) - offsetof(Entry, psqtAccumulation)); } }; template void clear(const Network& network) { for (auto& entries1D : entries) for (auto& entry : entries1D) entry.clear(network.featureTransformer.biases); } std::array& operator[](Square sq) { return entries[sq]; } std::array, SQUARE_NB> entries; }; template void clear(const Networks& networks) { big.clear(networks.big); small.clear(networks.small); } Cache big; Cache small; }; template struct AccumulatorState { Accumulator accumulatorBig; Accumulator accumulatorSmall; typename FeatureSet::DiffType diff; template auto& acc() noexcept { static_assert(Size == TransformedFeatureDimensionsBig || Size == TransformedFeatureDimensionsSmall, "Invalid size for accumulator"); if constexpr (Size == TransformedFeatureDimensionsBig) return accumulatorBig; else if constexpr (Size == TransformedFeatureDimensionsSmall) return accumulatorSmall; } template const auto& acc() const noexcept { static_assert(Size == TransformedFeatureDimensionsBig || Size == TransformedFeatureDimensionsSmall, "Invalid size for accumulator"); if constexpr (Size == TransformedFeatureDimensionsBig) return accumulatorBig; else if constexpr (Size == TransformedFeatureDimensionsSmall) return accumulatorSmall; } void reset(const typename FeatureSet::DiffType& dp) noexcept { diff = dp; accumulatorBig.computed.fill(false); accumulatorSmall.computed.fill(false); } typename FeatureSet::DiffType& reset() noexcept { accumulatorBig.computed.fill(false); accumulatorSmall.computed.fill(false); return diff; } }; class AccumulatorStack { public: static constexpr std::size_t MaxSize = MAX_PLY + 1; template [[nodiscard]] const AccumulatorState& latest() const noexcept; void reset() noexcept; std::pair push() noexcept; void pop() noexcept; template void evaluate(const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept; private: template [[nodiscard]] AccumulatorState& mut_latest() noexcept; template [[nodiscard]] const std::array, MaxSize>& accumulators() const noexcept; template [[nodiscard]] std::array, MaxSize>& mut_accumulators() noexcept; template void evaluate_side(Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, AccumulatorCaches::Cache& cache) noexcept; template [[nodiscard]] std::size_t find_last_usable_accumulator(Color perspective) const noexcept; template void forward_update_incremental(Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, const std::size_t begin) noexcept; template void backward_update_incremental(Color perspective, const Position& pos, const FeatureTransformer& featureTransformer, const std::size_t end) noexcept; std::array, MaxSize> psq_accumulators; std::array, MaxSize> threat_accumulators; std::size_t size = 1; }; } // namespace Stockfish::Eval::NNUE #endif // NNUE_ACCUMULATOR_H_INCLUDED ================================================ FILE: src/nnue/nnue_architecture.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Input features and network structure used in NNUE evaluation function #ifndef NNUE_ARCHITECTURE_H_INCLUDED #define NNUE_ARCHITECTURE_H_INCLUDED #include #include #include #include "features/half_ka_v2_hm.h" #include "features/full_threats.h" #include "layers/affine_transform.h" #include "layers/affine_transform_sparse_input.h" #include "layers/clipped_relu.h" #include "layers/sqr_clipped_relu.h" #include "nnue_common.h" namespace Stockfish::Eval::NNUE { // Input features used in evaluation function using ThreatFeatureSet = Features::FullThreats; using PSQFeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion constexpr IndexType TransformedFeatureDimensionsBig = 1024; constexpr int L2Big = 31; constexpr int L3Big = 32; constexpr IndexType TransformedFeatureDimensionsSmall = 128; constexpr int L2Small = 15; constexpr int L3Small = 32; constexpr IndexType PSQTBuckets = 8; constexpr IndexType LayerStacks = 8; // If vector instructions are enabled, we update and refresh the // accumulator tile by tile such that each tile fits in the CPU's // vector registers. static_assert(PSQTBuckets % 8 == 0, "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); template struct NetworkArchitecture { static constexpr IndexType TransformedFeatureDimensions = L1; static constexpr int FC_0_OUTPUTS = L2; static constexpr int FC_1_OUTPUTS = L3; Layers::AffineTransformSparseInput fc_0; Layers::SqrClippedReLU ac_sqr_0; Layers::ClippedReLU ac_0; Layers::AffineTransform fc_1; Layers::ClippedReLU ac_1; Layers::AffineTransform fc_2; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { // input slice hash std::uint32_t hashValue = 0xEC42E90Du; hashValue ^= TransformedFeatureDimensions * 2; hashValue = decltype(fc_0)::get_hash_value(hashValue); hashValue = decltype(ac_0)::get_hash_value(hashValue); hashValue = decltype(fc_1)::get_hash_value(hashValue); hashValue = decltype(ac_1)::get_hash_value(hashValue); hashValue = decltype(fc_2)::get_hash_value(hashValue); return hashValue; } // Read network parameters bool read_parameters(std::istream& stream) { return fc_0.read_parameters(stream) && ac_0.read_parameters(stream) && fc_1.read_parameters(stream) && ac_1.read_parameters(stream) && fc_2.read_parameters(stream); } // Write network parameters bool write_parameters(std::ostream& stream) const { return fc_0.write_parameters(stream) && ac_0.write_parameters(stream) && fc_1.write_parameters(stream) && ac_1.write_parameters(stream) && fc_2.write_parameters(stream); } std::int32_t propagate(const TransformedFeatureType* transformedFeatures) const { struct alignas(CacheLineSize) Buffer { alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out; alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out; alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out; alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out; alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out; Buffer() { std::memset(this, 0, sizeof(*this)); } }; #if defined(__clang__) && (__APPLE__) // workaround for a bug reported with xcode 12 static thread_local auto tlsBuffer = std::make_unique(); // Access TLS only once, cache result. Buffer& buffer = *tlsBuffer; #else alignas(CacheLineSize) static thread_local Buffer buffer; #endif fc_0.propagate(transformedFeatures, buffer.fc_0_out); ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType)); fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1< struct std::hash> { std::size_t operator()(const Stockfish::Eval::NNUE::NetworkArchitecture& arch) const noexcept { return arch.get_content_hash(); } }; #endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED ================================================ FILE: src/nnue/nnue_common.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Constants used in NNUE evaluation function #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED #include #include #include #include #include #include #include "../misc.h" #if defined(USE_AVX2) #include #elif defined(USE_SSE41) #include #elif defined(USE_SSSE3) #include #elif defined(USE_SSE2) #include #elif defined(USE_NEON) #include #endif namespace Stockfish::Eval::NNUE { using BiasType = std::int16_t; using ThreatWeightType = std::int8_t; using WeightType = std::int16_t; using PSQTWeightType = std::int32_t; using IndexType = std::uint32_t; // Version of the evaluation file constexpr std::uint32_t Version = 0x7AF32F20u; // Constant used in evaluation value calculation constexpr int OutputScale = 16; constexpr int WeightScaleBits = 6; // Size of cache line (in bytes) constexpr std::size_t CacheLineSize = 64; constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; // SIMD width (in bytes) #if defined(USE_AVX2) constexpr std::size_t SimdWidth = 32; #elif defined(USE_SSE2) constexpr std::size_t SimdWidth = 16; #elif defined(USE_NEON) constexpr std::size_t SimdWidth = 16; #endif constexpr std::size_t MaxSimdWidth = 32; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; // Round n up to be a multiple of base template constexpr IntType ceil_to_multiple(IntType n, IntType base) { return (n + base - 1) / base * base; } // Utility to read an integer (signed or unsigned, any size) // from a stream in little-endian order. We swap the byte order after the read if // necessary to return a result with the byte ordering of the compiling machine. template inline IntType read_little_endian(std::istream& stream) { IntType result; if (IsLittleEndian) stream.read(reinterpret_cast(&result), sizeof(IntType)); else { std::uint8_t u[sizeof(IntType)]; std::make_unsigned_t v = 0; stream.read(reinterpret_cast(u), sizeof(IntType)); for (std::size_t i = 0; i < sizeof(IntType); ++i) v = (v << 8) | u[sizeof(IntType) - i - 1]; std::memcpy(&result, &v, sizeof(IntType)); } return result; } // Utility to write an integer (signed or unsigned, any size) // to a stream in little-endian order. We swap the byte order before the write if // necessary to always write in little-endian order, independently of the byte // ordering of the compiling machine. template inline void write_little_endian(std::ostream& stream, IntType value) { if (IsLittleEndian) stream.write(reinterpret_cast(&value), sizeof(IntType)); else { std::uint8_t u[sizeof(IntType)]; std::make_unsigned_t v = value; std::size_t i = 0; // if constexpr to silence the warning about shift by 8 if constexpr (sizeof(IntType) > 1) { for (; i + 1 < sizeof(IntType); ++i) { u[i] = std::uint8_t(v); v >>= 8; } } u[i] = std::uint8_t(v); stream.write(reinterpret_cast(u), sizeof(IntType)); } } // Read integers in bulk from a little-endian stream. // This reads N integers from stream s and puts them in array out. template inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { if (IsLittleEndian) stream.read(reinterpret_cast(out), sizeof(IntType) * count); else for (std::size_t i = 0; i < count; ++i) out[i] = read_little_endian(stream); } // Write integers in bulk to a little-endian stream. // This takes N integers from array values and writes them on stream s. template inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { if (IsLittleEndian) stream.write(reinterpret_cast(values), sizeof(IntType) * count); else for (std::size_t i = 0; i < count; ++i) write_little_endian(stream, values[i]); } // Read N signed integers from the stream s, putting them in the array out. // The stream is assumed to be compressed using the signed LEB128 format. // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. template inline void read_leb_128_detail(std::istream& stream, std::array& out, std::uint32_t& bytes_left, BufType& buf, std::uint32_t& buf_pos) { static_assert(std::is_signed_v, "Not implemented for unsigned types"); static_assert(sizeof(IntType) <= 4, "Not implemented for types larger than 32 bit"); IntType result = 0; size_t shift = 0, i = 0; while (i < Count) { if (buf_pos == buf.size()) { stream.read(reinterpret_cast(buf.data()), std::min(std::size_t(bytes_left), buf.size())); buf_pos = 0; } std::uint8_t byte = buf[buf_pos++]; --bytes_left; result |= (byte & 0x7f) << (shift % 32); shift += 7; if ((byte & 0x80) == 0) { out[i++] = (shift >= 32 || (byte & 0x40) == 0) ? result : result | ~((1 << shift) - 1); result = 0; shift = 0; } } } template inline void read_leb_128(std::istream& stream, Arrays&... outs) { // Check the presence of our LEB128 magic string char leb128MagicString[Leb128MagicStringSize]; stream.read(leb128MagicString, Leb128MagicStringSize); assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); auto bytes_left = read_little_endian(stream); std::array buf; std::uint32_t buf_pos = std::uint32_t(buf.size()); (read_leb_128_detail(stream, outs, bytes_left, buf, buf_pos), ...); assert(bytes_left == 0); } // Write signed integers to a stream with LEB128 compression. // This takes N integers from array values, compresses them with // the LEB128 algorithm and writes the result on the stream s. // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. template inline void write_leb_128(std::ostream& stream, const std::array& values) { // Write our LEB128 magic string stream.write(Leb128MagicString, Leb128MagicStringSize); static_assert(std::is_signed_v, "Not implemented for unsigned types"); std::uint32_t byte_count = 0; for (std::size_t i = 0; i < Count; ++i) { IntType value = values[i]; std::uint8_t byte; do { byte = value & 0x7f; value >>= 7; ++byte_count; } while ((byte & 0x40) == 0 ? value != 0 : value != -1); } write_little_endian(stream, byte_count); const std::uint32_t BUF_SIZE = 4096; std::uint8_t buf[BUF_SIZE]; std::uint32_t buf_pos = 0; auto flush = [&]() { if (buf_pos > 0) { stream.write(reinterpret_cast(buf), buf_pos); buf_pos = 0; } }; auto write = [&](std::uint8_t b) { buf[buf_pos++] = b; if (buf_pos == BUF_SIZE) flush(); }; for (std::size_t i = 0; i < Count; ++i) { IntType value = values[i]; while (true) { std::uint8_t byte = value & 0x7f; value >>= 7; if ((byte & 0x40) == 0 ? value == 0 : value == -1) { write(byte); break; } write(byte | 0x80); } } flush(); } } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED ================================================ FILE: src/nnue/nnue_feature_transformer.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // A class that converts the input features of the NNUE evaluation function #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED #define NNUE_FEATURE_TRANSFORMER_H_INCLUDED #include #include #include #include #include #include "../position.h" #include "../types.h" #include "nnue_accumulator.h" #include "nnue_architecture.h" #include "nnue_common.h" #include "simd.h" namespace Stockfish::Eval::NNUE { // Returns the inverse of a permutation template constexpr std::array invert_permutation(const std::array& order) { std::array inverse{}; for (std::size_t i = 0; i < order.size(); i++) inverse[order[i]] = i; return inverse; } // Divide a byte region of size TotalSize to chunks of size // BlockSize, and permute the blocks by a given order template void permute(std::array& data, const std::array& order) { constexpr std::size_t TotalSize = N * sizeof(T); static_assert(TotalSize % (BlockSize * OrderSize) == 0, "ChunkSize * OrderSize must perfectly divide TotalSize"); constexpr std::size_t ProcessChunkSize = BlockSize * OrderSize; std::array buffer{}; std::byte* const bytes = reinterpret_cast(data.data()); for (std::size_t i = 0; i < TotalSize; i += ProcessChunkSize) { std::byte* const values = &bytes[i]; for (std::size_t j = 0; j < OrderSize; j++) { auto* const buffer_chunk = &buffer[j * BlockSize]; auto* const value_chunk = &values[order[j] * BlockSize]; std::copy(value_chunk, value_chunk + BlockSize, buffer_chunk); } std::copy(std::begin(buffer), std::end(buffer), values); } } // Input feature converter template class FeatureTransformer { static constexpr bool UseThreats = (TransformedFeatureDimensions == TransformedFeatureDimensionsBig); // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; public: // Output type using OutputType = TransformedFeatureType; // Number of input/output dimensions static constexpr IndexType InputDimensions = PSQFeatureSet::Dimensions; static constexpr IndexType ThreatInputDimensions = ThreatFeatureSet::Dimensions; static constexpr IndexType TotalInputDimensions = InputDimensions + (UseThreats ? ThreatInputDimensions : 0); static constexpr IndexType OutputDimensions = HalfDimensions; // Size of forward propagation buffer static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType); // Store the order by which 128-bit blocks of a 1024-bit data must // be permuted so that calling packus on adjacent vectors of 16-bit // integers loaded from the data results in the pre-permutation order static constexpr auto PackusEpi16Order = []() -> std::array { #if defined(USE_AVX512) // _mm512_packus_epi16 after permutation: // | 0 | 2 | 4 | 6 | // Vector 0 // | 1 | 3 | 5 | 7 | // Vector 1 // | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | // Packed Result return {0, 2, 4, 6, 1, 3, 5, 7}; #elif defined(USE_AVX2) // _mm256_packus_epi16 after permutation: // | 0 | 2 | | 4 | 6 | // Vector 0, 2 // | 1 | 3 | | 5 | 7 | // Vector 1, 3 // | 0 | 1 | 2 | 3 | | 4 | 5 | 6 | 7 | // Packed Result return {0, 2, 1, 3, 4, 6, 5, 7}; #else return {0, 1, 2, 3, 4, 5, 6, 7}; #endif }(); static constexpr auto InversePackusEpi16Order = invert_permutation(PackusEpi16Order); static constexpr std::uint32_t combine_hash(std::initializer_list hashes) { std::uint32_t hash = 0; for (const auto component_hash : hashes) { hash = (hash << 1) | (hash >> 31); hash ^= component_hash; } return hash; } // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { return (UseThreats ? combine_hash({ThreatFeatureSet::HashValue, PSQFeatureSet::HashValue}) : PSQFeatureSet::HashValue) ^ (OutputDimensions * 2); } void permute_weights() { permute<16>(biases, PackusEpi16Order); permute<16>(weights, PackusEpi16Order); if constexpr (UseThreats) permute<8>(threatWeights, PackusEpi16Order); } void unpermute_weights() { permute<16>(biases, InversePackusEpi16Order); permute<16>(weights, InversePackusEpi16Order); if constexpr (UseThreats) permute<8>(threatWeights, InversePackusEpi16Order); } // Read network parameters bool read_parameters(std::istream& stream) { read_leb_128(stream, biases); if constexpr (UseThreats) { read_little_endian(stream, threatWeights.data(), ThreatInputDimensions * HalfDimensions); read_leb_128(stream, weights); read_leb_128(stream, threatPsqtWeights, psqtWeights); } else { read_leb_128(stream, weights); read_leb_128(stream, psqtWeights); } permute_weights(); return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { std::unique_ptr copy = std::make_unique(*this); copy->unpermute_weights(); write_leb_128(stream, copy->biases); if constexpr (UseThreats) { write_little_endian(stream, copy->threatWeights.data(), ThreatInputDimensions * HalfDimensions); write_leb_128(stream, copy->weights); auto combinedPsqtWeights = std::make_unique>(); std::copy(std::begin(copy->threatPsqtWeights), std::begin(copy->threatPsqtWeights) + ThreatInputDimensions * PSQTBuckets, combinedPsqtWeights->begin()); std::copy(std::begin(copy->psqtWeights), std::begin(copy->psqtWeights) + InputDimensions * PSQTBuckets, combinedPsqtWeights->begin() + ThreatInputDimensions * PSQTBuckets); write_leb_128(stream, *combinedPsqtWeights); } else { write_leb_128(stream, copy->weights); write_leb_128(stream, copy->psqtWeights); } return !stream.fail(); } std::size_t get_content_hash() const { std::size_t h = 0; hash_combine(h, get_raw_data_hash(biases)); hash_combine(h, get_raw_data_hash(weights)); hash_combine(h, get_raw_data_hash(psqtWeights)); if constexpr (UseThreats) { hash_combine(h, get_raw_data_hash(threatWeights)); hash_combine(h, get_raw_data_hash(threatPsqtWeights)); } hash_combine(h, get_hash_value()); return h; } // Convert input features std::int32_t transform(const Position& pos, AccumulatorStack& accumulatorStack, AccumulatorCaches::Cache& cache, OutputType* output, int bucket) const { using namespace SIMD; accumulatorStack.evaluate(pos, *this, cache); const auto& accumulatorState = accumulatorStack.latest(); const auto& threatAccumulatorState = accumulatorStack.latest(); const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; const auto& psqtAccumulation = (accumulatorState.acc()).psqtAccumulation; auto psqt = (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]); if constexpr (UseThreats) { const auto& threatPsqtAccumulation = (threatAccumulatorState.acc()).psqtAccumulation; psqt = (psqt + threatPsqtAccumulation[perspectives[0]][bucket] - threatPsqtAccumulation[perspectives[1]][bucket]) / 2; } else psqt /= 2; const auto& accumulation = (accumulatorState.acc()).accumulation; const auto& threatAccumulation = (threatAccumulatorState.acc()).accumulation; for (IndexType p = 0; p < 2; ++p) { const IndexType offset = (HalfDimensions / 2) * p; #if defined(VECTOR) constexpr IndexType OutputChunkSize = MaxChunkSize; static_assert((HalfDimensions / 2) % OutputChunkSize == 0); constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; const vec_t Zero = vec_zero(); const vec_t One = vec_set_16(255); const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); const vec_t* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); vec_t* out = reinterpret_cast(output + offset); // Per the NNUE architecture, here we want to multiply pairs of // clipped elements and divide the product by 128. To do this, // we can naively perform min/max operation to clip each of the // four int16 vectors, mullo pairs together, then pack them into // one int8 vector. However, there exists a faster way. // The idea here is to use the implicit clipping from packus to // save us two vec_max_16 instructions. This clipping works due // to the fact that any int16 integer below zero will be zeroed // on packus. // Consider the case where the second element is negative. // If we do standard clipping, that element will be zero, which // means our pairwise product is zero. If we perform packus and // remove the lower-side clip for the second element, then our // product before packus will be negative, and is zeroed on pack. // The two operation produce equivalent results, but the second // one (using packus) saves one max operation per pair. // But here we run into a problem: mullo does not preserve the // sign of the multiplication. We can get around this by doing // mulhi, which keeps the sign. But that requires an additional // tweak. // mulhi cuts off the last 16 bits of the resulting product, // which is the same as performing a rightward shift of 16 bits. // We can use this to our advantage. Recall that we want to // divide the final product by 128, which is equivalent to a // 7-bit right shift. Intuitively, if we shift the clipped // value left by 9, and perform mulhi, which shifts the product // right by 16 bits, then we will net a right shift of 7 bits. // However, this won't work as intended. Since we clip the // values to have a maximum value of 127, shifting it by 9 bits // might occupy the signed bit, resulting in some positive // values being interpreted as negative after the shift. // There is a way, however, to get around this limitation. When // loading the network, scale accumulator weights and biases by // 2. To get the same pairwise multiplication result as before, // we need to divide the product by 128 * 2 * 2 = 512, which // amounts to a right shift of 9 bits. So now we only have to // shift left by 7 bits, perform mulhi (shifts right by 16 bits) // and net a 9 bit right shift. Since we scaled everything by // two, the values are clipped at 127 * 2 = 254, which occupies // 8 bits. Shifting it by 7 bits left will no longer occupy the // signed bit, so we are safe. // Note that on NEON processors, we shift left by 6 instead // because the instruction "vqdmulhq_s16" also doubles the // return value after the multiplication, adding an extra shift // to the left by 1, so we compensate by shifting less before // the multiplication. constexpr int shift = #if defined(USE_SSE2) 7; #else 6; #endif if constexpr (UseThreats) { const vec_t* tin0 = reinterpret_cast(&(threatAccumulation[perspectives[p]][0])); const vec_t* tin1 = reinterpret_cast( &(threatAccumulation[perspectives[p]][HalfDimensions / 2])); for (IndexType j = 0; j < NumOutputChunks; ++j) { const vec_t acc0a = vec_add_16(in0[j * 2 + 0], tin0[j * 2 + 0]); const vec_t acc0b = vec_add_16(in0[j * 2 + 1], tin0[j * 2 + 1]); const vec_t acc1a = vec_add_16(in1[j * 2 + 0], tin1[j * 2 + 0]); const vec_t acc1b = vec_add_16(in1[j * 2 + 1], tin1[j * 2 + 1]); const vec_t sum0a = vec_slli_16(vec_max_16(vec_min_16(acc0a, One), Zero), shift); const vec_t sum0b = vec_slli_16(vec_max_16(vec_min_16(acc0b, One), Zero), shift); const vec_t sum1a = vec_min_16(acc1a, One); const vec_t sum1b = vec_min_16(acc1b, One); const vec_t pa = vec_mulhi_16(sum0a, sum1a); const vec_t pb = vec_mulhi_16(sum0b, sum1b); out[j] = vec_packus_16(pa, pb); } } else { for (IndexType j = 0; j < NumOutputChunks; ++j) { const vec_t sum0a = vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero), shift); const vec_t sum0b = vec_slli_16(vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero), shift); const vec_t sum1a = vec_min_16(in1[j * 2 + 0], One); const vec_t sum1b = vec_min_16(in1[j * 2 + 1], One); const vec_t pa = vec_mulhi_16(sum0a, sum1a); const vec_t pb = vec_mulhi_16(sum0b, sum1b); out[j] = vec_packus_16(pa, pb); } } #else for (IndexType j = 0; j < HalfDimensions / 2; ++j) { BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; if constexpr (UseThreats) { sum0 += threatAccumulation[static_cast(perspectives[p])][j + 0]; sum1 += threatAccumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; } sum0 = std::clamp(sum0, 0, 255); sum1 = std::clamp(sum1, 0, 255); output[offset + j] = static_cast(unsigned(sum0 * sum1) / 512); } #endif } return psqt; } // end of function transform() alignas(CacheLineSize) std::array biases; alignas(CacheLineSize) std::array weights; alignas(CacheLineSize) std::array threatWeights; alignas(CacheLineSize) std::array psqtWeights; alignas(CacheLineSize) std::array threatPsqtWeights; }; } // namespace Stockfish::Eval::NNUE template struct std::hash> { std::size_t operator()(const Stockfish::Eval::NNUE::FeatureTransformer& ft) const noexcept { return ft.get_content_hash(); } }; #endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED ================================================ FILE: src/nnue/nnue_misc.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ // Code for calculating NNUE evaluation function #include "nnue_misc.h" #include #include #include #include #include #include #include #include #include #include "../position.h" #include "../types.h" #include "../uci.h" #include "network.h" #include "nnue_accumulator.h" namespace Stockfish::Eval::NNUE { constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); namespace { // Converts a Value into (centi)pawns and writes it in a buffer. // The buffer must have capacity for at least 5 chars. void format_cp_compact(Value v, char* buffer, const Position& pos) { buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); int cp = std::abs(UCIEngine::to_cp(v, pos)); if (cp >= 10000) { buffer[1] = '0' + cp / 10000; cp %= 10000; buffer[2] = '0' + cp / 1000; cp %= 1000; buffer[3] = '0' + cp / 100; buffer[4] = ' '; } else if (cp >= 1000) { buffer[1] = '0' + cp / 1000; cp %= 1000; buffer[2] = '0' + cp / 100; cp %= 100; buffer[3] = '.'; buffer[4] = '0' + cp / 10; } else { buffer[1] = '0' + cp / 100; cp %= 100; buffer[2] = '.'; buffer[3] = '0' + cp / 10; cp %= 10; buffer[4] = '0' + cp / 1; } } // Converts a Value into pawns, always keeping two decimals void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) { const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos)); stream << (v < 0 ? '-' : v > 0 ? '+' : ' ') << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns; } } // Returns a string with the value of each piece on a board, // and a table for (PSQT, Layers) values bucket by bucket. std::string trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) { std::stringstream ss; char board[3 * 8 + 1][8 * 8 + 2]; std::memset(board, ' ', sizeof(board)); for (int row = 0; row < 3 * 8 + 1; ++row) board[row][8 * 8 + 1] = '\0'; // A lambda to output one box of the board auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) { const int x = int(file) * 8; const int y = (7 - int(rank)) * 3; for (int i = 1; i < 8; ++i) board[y][x + i] = board[y + 3][x + i] = '-'; for (int i = 1; i < 3; ++i) board[y + i][x] = board[y + i][x + 8] = '|'; board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; if (pc != NO_PIECE) board[y + 1][x + 4] = PieceToChar[pc]; if (is_valid(value)) format_cp_compact(value, &board[y + 2][x + 2], pos); }; auto accumulators = std::make_unique(); // We estimate the value of each piece by doing a differential evaluation from // the current base eval, simulating the removal of the piece from its square. auto [psqt, positional] = networks.big.evaluate(pos, *accumulators, caches.big); Value base = psqt + positional; base = pos.side_to_move() == WHITE ? base : -base; for (File f = FILE_A; f <= FILE_H; ++f) for (Rank r = RANK_1; r <= RANK_8; ++r) { Square sq = make_square(f, r); Piece pc = pos.piece_on(sq); Value v = VALUE_NONE; if (pc != NO_PIECE && type_of(pc) != KING) { pos.remove_piece(sq); accumulators->reset(); std::tie(psqt, positional) = networks.big.evaluate(pos, *accumulators, caches.big); Value eval = psqt + positional; eval = pos.side_to_move() == WHITE ? eval : -eval; v = base - eval; pos.put_piece(pc, sq); } writeSquare(f, r, pc, v); } ss << " NNUE derived piece values:\n"; for (int row = 0; row < 3 * 8 + 1; ++row) ss << board[row] << '\n'; ss << '\n'; accumulators->reset(); auto t = networks.big.trace_evaluate(pos, *accumulators, caches.big); ss << " NNUE network contributions " << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl << "+------------+------------+------------+------------+\n" << "| Bucket | Material | Positional | Total |\n" << "| | (PSQT) | (Layers) | |\n" << "+------------+------------+------------+------------+\n"; for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { ss << "| " << bucket << " " // << " | "; format_cp_aligned_dot(t.psqt[bucket], ss, pos); ss << " " // << " | "; format_cp_aligned_dot(t.positional[bucket], ss, pos); ss << " " // << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos); ss << " " // << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; ss << '\n'; } ss << "+------------+------------+------------+------------+\n"; return ss.str(); } } // namespace Stockfish::Eval::NNUE ================================================ FILE: src/nnue/nnue_misc.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef NNUE_MISC_H_INCLUDED #define NNUE_MISC_H_INCLUDED #include #include #include #include "../misc.h" #include "../types.h" #include "nnue_architecture.h" namespace Stockfish { class Position; namespace Eval::NNUE { // EvalFile uses fixed string types because it's part of the network structure which must be trivial. struct EvalFile { // Default net name, will use one of the EvalFileDefaultName* macros defined // in evaluate.h FixedString<256> defaultName; // Selected net name, either via uci option or default FixedString<256> current; // Net description extracted from the net file FixedString<256> netDescription; }; struct NnueEvalTrace { static_assert(LayerStacks == PSQTBuckets); Value psqt[LayerStacks]; Value positional[LayerStacks]; std::size_t correctBucket; }; struct Networks; struct AccumulatorCaches; std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches); } // namespace Stockfish::Eval::NNUE } // namespace Stockfish template<> struct std::hash { std::size_t operator()(const Stockfish::Eval::NNUE::EvalFile& evalFile) const noexcept { std::size_t h = 0; Stockfish::hash_combine(h, evalFile.defaultName); Stockfish::hash_combine(h, evalFile.current); Stockfish::hash_combine(h, evalFile.netDescription); return h; } }; #endif // #ifndef NNUE_MISC_H_INCLUDED ================================================ FILE: src/nnue/simd.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef NNUE_SIMD_H_INCLUDED #define NNUE_SIMD_H_INCLUDED #if defined(USE_AVX2) #include #elif defined(USE_SSE41) #include #elif defined(USE_SSSE3) #include #elif defined(USE_SSE2) #include #elif defined(USE_NEON) #include #endif #include "../types.h" #include "nnue_common.h" namespace Stockfish::Eval::NNUE::SIMD { // If vector instructions are enabled, we update and refresh the // accumulator tile by tile such that each tile fits in the CPU's // vector registers. #define VECTOR #ifdef USE_AVX512 using vec_t = __m512i; using vec_i8_t = __m256i; using vec128_t = __m128i; using psqt_vec_t = __m256i; using vec_uint_t = __m512i; #define vec_load(a) _mm512_load_si512(a) #define vec_store(a, b) _mm512_store_si512(a, b) #define vec_convert_8_16(a) _mm512_cvtepi8_epi16(a) #define vec_add_16(a, b) _mm512_add_epi16(a, b) #define vec_sub_16(a, b) _mm512_sub_epi16(a, b) #define vec_mulhi_16(a, b) _mm512_mulhi_epi16(a, b) #define vec_zero() _mm512_setzero_epi32() #define vec_set_16(a) _mm512_set1_epi16(a) #define vec_max_16(a, b) _mm512_max_epi16(a, b) #define vec_min_16(a, b) _mm512_min_epi16(a, b) #define vec_slli_16(a, b) _mm512_slli_epi16(a, b) // Inverse permuted at load time #define vec_packus_16(a, b) _mm512_packus_epi16(a, b) #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) #define vec_zero_psqt() _mm256_setzero_si256() #ifdef USE_SSSE3 #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) #endif #define vec128_zero _mm_setzero_si128() #define vec128_set_16(a) _mm_set1_epi16(a) #define vec128_load(a) _mm_load_si128(a) #define vec128_storeu(a, b) _mm_storeu_si128(a, b) #define vec128_add(a, b) _mm_add_epi16(a, b) #define NumRegistersSIMD 16 #define MaxChunkSize 64 #elif USE_AVX2 using vec_t = __m256i; using vec_i8_t = __m128i; using vec128_t = __m128i; using psqt_vec_t = __m256i; using vec_uint_t = __m256i; #define vec_load(a) _mm256_load_si256(a) #define vec_store(a, b) _mm256_store_si256(a, b) #define vec_convert_8_16(a) _mm256_cvtepi8_epi16(a) #define vec_add_16(a, b) _mm256_add_epi16(a, b) #define vec_sub_16(a, b) _mm256_sub_epi16(a, b) #define vec_mulhi_16(a, b) _mm256_mulhi_epi16(a, b) #define vec_zero() _mm256_setzero_si256() #define vec_set_16(a) _mm256_set1_epi16(a) #define vec_max_16(a, b) _mm256_max_epi16(a, b) #define vec_min_16(a, b) _mm256_min_epi16(a, b) #define vec_slli_16(a, b) _mm256_slli_epi16(a, b) // Inverse permuted at load time #define vec_packus_16(a, b) _mm256_packus_epi16(a, b) #define vec_load_psqt(a) _mm256_load_si256(a) #define vec_store_psqt(a, b) _mm256_store_si256(a, b) #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) #define vec_zero_psqt() _mm256_setzero_si256() #ifdef USE_SSSE3 #if defined(USE_VNNI) && !defined(USE_AVXVNNI) #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) #else #define vec_nnz(a) \ _mm256_movemask_ps( \ _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) #endif #endif #define vec128_zero _mm_setzero_si128() #define vec128_set_16(a) _mm_set1_epi16(a) #define vec128_load(a) _mm_load_si128(a) #define vec128_storeu(a, b) _mm_storeu_si128(a, b) #define vec128_add(a, b) _mm_add_epi16(a, b) #define NumRegistersSIMD 12 #define MaxChunkSize 32 #elif USE_SSE2 using vec_t = __m128i; using vec_i8_t = std::uint64_t; // for the correct size -- will be loaded into an xmm reg using vec128_t = __m128i; using psqt_vec_t = __m128i; using vec_uint_t = __m128i; #define vec_load(a) (*(a)) #define vec_store(a, b) *(a) = (b) #define vec_add_16(a, b) _mm_add_epi16(a, b) #define vec_sub_16(a, b) _mm_sub_epi16(a, b) #define vec_mulhi_16(a, b) _mm_mulhi_epi16(a, b) #define vec_zero() _mm_setzero_si128() #define vec_set_16(a) _mm_set1_epi16(a) #define vec_max_16(a, b) _mm_max_epi16(a, b) #define vec_min_16(a, b) _mm_min_epi16(a, b) #define vec_slli_16(a, b) _mm_slli_epi16(a, b) #define vec_packus_16(a, b) _mm_packus_epi16(a, b) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a, b) *(a) = (b) #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) #define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b) #define vec_zero_psqt() _mm_setzero_si128() #ifdef USE_SSSE3 #define vec_nnz(a) \ _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) #endif #ifdef __i386__ inline __m128i _mm_cvtsi64_si128(int64_t val) { return _mm_loadl_epi64(reinterpret_cast(&val)); } #endif #ifdef USE_SSE41 #define vec_convert_8_16(a) _mm_cvtepi8_epi16(_mm_cvtsi64_si128(static_cast(a))) #else // Credit: Yoshie2000 inline __m128i vec_convert_8_16(uint64_t x) { __m128i v8 = _mm_cvtsi64_si128(static_cast(x)); __m128i sign = _mm_cmpgt_epi8(_mm_setzero_si128(), v8); return _mm_unpacklo_epi8(v8, sign); } #endif #define vec128_zero _mm_setzero_si128() #define vec128_set_16(a) _mm_set1_epi16(a) #define vec128_load(a) _mm_load_si128(a) #define vec128_storeu(a, b) _mm_storeu_si128(a, b) #define vec128_add(a, b) _mm_add_epi16(a, b) #define NumRegistersSIMD (Is64Bit ? 12 : 6) #define MaxChunkSize 16 #elif USE_NEON using vec_i8x8_t __attribute__((may_alias)) = int8x8_t; using vec_i16x8_t __attribute__((may_alias)) = int16x8_t; using vec_i8x16_t __attribute__((may_alias)) = int8x16_t; using vec_u16x8_t __attribute__((may_alias)) = uint16x8_t; using vec_i32x4_t __attribute__((may_alias)) = int32x4_t; using vec_t __attribute__((may_alias)) = int16x8_t; using vec_i8_t __attribute__((may_alias)) = int8x16_t; using psqt_vec_t __attribute__((may_alias)) = int32x4_t; using vec128_t __attribute__((may_alias)) = uint16x8_t; using vec_uint_t __attribute__((may_alias)) = uint32x4_t; #define vec_load(a) (*(a)) #define vec_store(a, b) *(a) = (b) #define vec_add_16(a, b) vaddq_s16(a, b) #define vec_sub_16(a, b) vsubq_s16(a, b) #define vec_mulhi_16(a, b) vqdmulhq_s16(a, b) #define vec_zero() vec_t{0} #define vec_set_16(a) vdupq_n_s16(a) #define vec_max_16(a, b) vmaxq_s16(a, b) #define vec_min_16(a, b) vminq_s16(a, b) #define vec_slli_16(a, b) vshlq_s16(a, vec_set_16(b)) #define vec_packus_16(a, b) reinterpret_cast(vcombine_u8(vqmovun_s16(a), vqmovun_s16(b))) #define vec_load_psqt(a) (*(a)) #define vec_store_psqt(a, b) *(a) = (b) #define vec_add_psqt_32(a, b) vaddq_s32(a, b) #define vec_sub_psqt_32(a, b) vsubq_s32(a, b) #define vec_zero_psqt() psqt_vec_t{0} static constexpr std::uint32_t Mask[4] = {1, 2, 4, 8}; #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask))) #define vec128_zero vdupq_n_u16(0) #define vec128_set_16(a) vdupq_n_u16(a) #define vec128_load(a) vld1q_u16(reinterpret_cast(a)) #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast(a), b) #define vec128_add(a, b) vaddq_u16(a, b) #define NumRegistersSIMD 16 #define MaxChunkSize 16 #ifndef __aarch64__ // Single instruction doesn't exist on 32-bit ARM inline int16x8_t vmovl_high_s8(int8x16_t val) { return vmovl_s8(vget_high_s8(val)); } #endif #else #undef VECTOR #endif struct Vec16Wrapper { #ifdef VECTOR using type = vec_t; static type add(const type& lhs, const type& rhs) { return vec_add_16(lhs, rhs); } static type sub(const type& lhs, const type& rhs) { return vec_sub_16(lhs, rhs); } #else using type = BiasType; static type add(const type& lhs, const type& rhs) { return lhs + rhs; } static type sub(const type& lhs, const type& rhs) { return lhs - rhs; } #endif }; struct Vec32Wrapper { #ifdef VECTOR using type = psqt_vec_t; static type add(const type& lhs, const type& rhs) { return vec_add_psqt_32(lhs, rhs); } static type sub(const type& lhs, const type& rhs) { return vec_sub_psqt_32(lhs, rhs); } #else using type = PSQTWeightType; static type add(const type& lhs, const type& rhs) { return lhs + rhs; } static type sub(const type& lhs, const type& rhs) { return lhs - rhs; } #endif }; enum UpdateOperation { Add, Sub }; template = true> typename VecWrapper::type fused(const typename VecWrapper::type& in) { return in; } template, bool> = true, std::enable_if_t = true> typename VecWrapper::type fused(const typename VecWrapper::type& in, const T& operand, const Ts&... operands) { switch (update_op) { case Add : return fused(VecWrapper::add(in, operand), operands...); case Sub : return fused(VecWrapper::sub(in, operand), operands...); default : static_assert(update_op == Add || update_op == Sub, "Only Add and Sub are currently supported."); return typename VecWrapper::type(); } } #if defined(USE_AVX512) [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { return _mm512_reduce_add_epi32(sum) + bias; } [[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { #if defined(USE_VNNI) acc = _mm512_dpbusd_epi32(acc, a, b); #else __m512i product0 = _mm512_maddubs_epi16(a, b); product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); acc = _mm512_add_epi32(acc, product0); #endif } #endif #if defined(USE_AVX2) [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); return _mm_cvtsi128_si32(sum128) + bias; } [[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) { #if defined(USE_VNNI) acc = _mm256_dpbusd_epi32(acc, a, b); #else __m256i product0 = _mm256_maddubs_epi16(a, b); product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); acc = _mm256_add_epi32(acc, product0); #endif } #endif #if defined(USE_SSSE3) [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB return _mm_cvtsi128_si32(sum) + bias; } [[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) { __m128i product0 = _mm_maddubs_epi16(a, b); product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); acc = _mm_add_epi32(acc, product0); } #endif #if defined(USE_NEON_DOTPROD) [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { acc = vdotq_s32(acc, a, b); } #endif #if defined(USE_NEON) [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { #if USE_NEON >= 8 return vaddvq_s32(s); #else return s[0] + s[1] + s[2] + s[3]; #endif } [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { return neon_m128_reduce_add_epi32(sum) + bias; } #endif #if USE_NEON >= 8 [[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); int16x8_t product1 = vmull_high_s8(a, b); int16x8_t sum = vpaddq_s16(product0, product1); acc = vpadalq_s16(acc, sum); } #endif // Compute optimal SIMD register count for feature transformer accumulation. template class SIMDTiling { #ifdef VECTOR // We use __m* types as template arguments, which causes GCC to emit warnings // about losing some attribute information. This is irrelevant to us as we // only take their size, so the following pragma are harmless. #if defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-attributes" #endif template static constexpr int BestRegisterCount() { constexpr std::size_t RegisterSize = sizeof(SIMDRegisterType); constexpr std::size_t LaneSize = sizeof(LaneType); static_assert(RegisterSize >= LaneSize); static_assert(MaxRegisters <= NumRegistersSIMD); static_assert(MaxRegisters > 0); static_assert(NumRegistersSIMD > 0); static_assert(RegisterSize % LaneSize == 0); static_assert((NumLanes * LaneSize) % RegisterSize == 0); const int ideal = (NumLanes * LaneSize) / RegisterSize; if (ideal <= MaxRegisters) return ideal; // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters for (int divisor = MaxRegisters; divisor > 1; --divisor) if (ideal % divisor == 0) return divisor; return 1; } #if defined(__GNUC__) #pragma GCC diagnostic pop #endif public: static constexpr int NumRegs = BestRegisterCount(); static constexpr int NumPsqtRegs = BestRegisterCount(); static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); #endif }; } #endif ================================================ FILE: src/numa.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef NUMA_H_INCLUDED #define NUMA_H_INCLUDED #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "shm.h" // We support linux very well, but we explicitly do NOT support Android, // because there is no affected systems, not worth maintaining. #if defined(__linux__) && !defined(__ANDROID__) #if !defined(_GNU_SOURCE) #define _GNU_SOURCE #endif #include #elif defined(_WIN64) #if _WIN32_WINNT < 0x0601 #undef _WIN32_WINNT #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes #endif // On Windows each processor group can have up to 64 processors. // https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups static constexpr size_t WIN_PROCESSOR_GROUP_SIZE = 64; #if !defined(NOMINMAX) #define NOMINMAX #endif #include #if defined small #undef small #endif // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadselectedcpusetmasks using SetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT); // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getthreadselectedcpusetmasks using GetThreadSelectedCpuSetMasks_t = BOOL (*)(HANDLE, PGROUP_AFFINITY, USHORT, PUSHORT); #endif #include "misc.h" namespace Stockfish { using CpuIndex = size_t; using NumaIndex = size_t; inline CpuIndex get_hardware_concurrency() { CpuIndex concurrency = std::thread::hardware_concurrency(); // Get all processors across all processor groups on windows, since // hardware_concurrency() only returns the number of processors in // the first group, because only these are available to std::thread. #ifdef _WIN64 concurrency = std::max(concurrency, GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)); #endif return concurrency; } inline const CpuIndex SYSTEM_THREADS_NB = std::max(1, get_hardware_concurrency()); #if defined(_WIN64) struct WindowsAffinity { std::optional> oldApi; std::optional> newApi; // We also provide diagnostic for when the affinity is set to nullopt // whether it was due to being indeterminate. If affinity is indeterminate // it is best to assume it is not set at all, so consistent with the meaning // of the nullopt affinity. bool isNewDeterminate = true; bool isOldDeterminate = true; std::optional> get_combined() const { if (!oldApi.has_value()) return newApi; if (!newApi.has_value()) return oldApi; std::set intersect; std::set_intersection(oldApi->begin(), oldApi->end(), newApi->begin(), newApi->end(), std::inserter(intersect, intersect.begin())); return intersect; } // Since Windows 11 and Windows Server 2022 thread affinities can span // processor groups and can be set as such by a new WinAPI function. However, // we may need to force using the old API if we detect that the process has // affinity set by the old API already and we want to override that. Due to the // limitations of the old API we cannot detect its use reliably. There will be // cases where we detect not use but it has actually been used and vice versa. bool likely_used_old_api() const { return oldApi.has_value() || !isOldDeterminate; } }; inline std::pair> get_process_group_affinity() { // GetProcessGroupAffinity requires the GroupArray argument to be // aligned to 4 bytes instead of just 2. static constexpr size_t GroupArrayMinimumAlignment = 4; static_assert(GroupArrayMinimumAlignment >= alignof(USHORT)); // The function should succeed the second time, but it may fail if the group // affinity has changed between GetProcessGroupAffinity calls. In such case // we consider this a hard error, as we Cannot work with unstable affinities // anyway. static constexpr int MAX_TRIES = 2; USHORT GroupCount = 1; for (int i = 0; i < MAX_TRIES; ++i) { auto GroupArray = std::make_unique( GroupCount + (GroupArrayMinimumAlignment / alignof(USHORT) - 1)); USHORT* GroupArrayAligned = align_ptr_up(GroupArray.get()); const BOOL status = GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArrayAligned); if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { break; } if (status != 0) { return std::make_pair(status, std::vector(GroupArrayAligned, GroupArrayAligned + GroupCount)); } } return std::make_pair(0, std::vector()); } // On Windows there are two ways to set affinity, and therefore 2 ways to get it. // These are not consistent, so we have to check both. In some cases it is actually // not possible to determine affinity. For example when two different threads have // affinity on different processor groups, set using SetThreadAffinityMask, we cannot // retrieve the actual affinities. // From documentation on GetProcessAffinityMask: // > If the calling process contains threads in multiple groups, // > the function returns zero for both affinity masks. // In such cases we just give up and assume we have affinity for all processors. // nullopt means no affinity is set, that is, all processors are allowed inline WindowsAffinity get_process_affinity() { HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto GetThreadSelectedCpuSetMasks_f = GetThreadSelectedCpuSetMasks_t( (void (*)()) GetProcAddress(k32, "GetThreadSelectedCpuSetMasks")); BOOL status = 0; WindowsAffinity affinity; if (GetThreadSelectedCpuSetMasks_f != nullptr) { USHORT RequiredMaskCount; status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), nullptr, 0, &RequiredMaskCount); // We expect ERROR_INSUFFICIENT_BUFFER from GetThreadSelectedCpuSetMasks, // but other failure is an actual error. if (status == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { affinity.isNewDeterminate = false; } else if (RequiredMaskCount > 0) { // If RequiredMaskCount then these affinities were never set, but it's // not consistent so GetProcessAffinityMask may still return some affinity. auto groupAffinities = std::make_unique(RequiredMaskCount); status = GetThreadSelectedCpuSetMasks_f(GetCurrentThread(), groupAffinities.get(), RequiredMaskCount, &RequiredMaskCount); if (status == 0) { affinity.isNewDeterminate = false; } else { std::set cpus; for (USHORT i = 0; i < RequiredMaskCount; ++i) { const size_t procGroupIndex = groupAffinities[i].Group; for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) { if (groupAffinities[i].Mask & (KAFFINITY(1) << j)) cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); } } affinity.newApi = std::move(cpus); } } } // NOTE: There is no way to determine full affinity using the old API if // individual threads set affinity on different processor groups. DWORD_PTR proc, sys; status = GetProcessAffinityMask(GetCurrentProcess(), &proc, &sys); // If proc == 0 then we cannot determine affinity because it spans processor groups. // On Windows 11 and Server 2022 it will instead // > If, however, hHandle specifies a handle to the current process, the function // > always uses the calling thread's primary group (which by default is the same // > as the process' primary group) in order to set the // > lpProcessAffinityMask and lpSystemAffinityMask. // So it will never be indeterminate here. We can only make assumptions later. if (status == 0 || proc == 0) { affinity.isOldDeterminate = false; return affinity; } // If SetProcessAffinityMask was never called the affinity must span // all processor groups, but if it was called it must only span one. std::vector groupAffinity; // We need to capture this later and capturing // from structured bindings requires c++20. std::tie(status, groupAffinity) = get_process_group_affinity(); if (status == 0) { affinity.isOldDeterminate = false; return affinity; } if (groupAffinity.size() == 1) { // We detect the case when affinity is set to all processors and correctly // leave affinity.oldApi as nullopt. if (GetActiveProcessorGroupCount() != 1 || proc != sys) { std::set cpus; const size_t procGroupIndex = groupAffinity[0]; const uint64_t mask = static_cast(proc); for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) { if (mask & (KAFFINITY(1) << j)) cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); } affinity.oldApi = std::move(cpus); } } else { // If we got here it means that either SetProcessAffinityMask was never set // or we're on Windows 11/Server 2022. // Since Windows 11 and Windows Server 2022 the behaviour of // GetProcessAffinityMask changed: // > If, however, hHandle specifies a handle to the current process, // > the function always uses the calling thread's primary group // > (which by default is the same as the process' primary group) // > in order to set the lpProcessAffinityMask and lpSystemAffinityMask. // In which case we can actually retrieve the full affinity. if (GetThreadSelectedCpuSetMasks_f != nullptr) { std::thread th([&]() { std::set cpus; bool isAffinityFull = true; for (auto procGroupIndex : groupAffinity) { const int numActiveProcessors = GetActiveProcessorCount(static_cast(procGroupIndex)); // We have to schedule to two different processors // and & the affinities we get. Otherwise our processor // choice could influence the resulting affinity. // We assume the processor IDs within the group are // filled sequentially from 0. uint64_t procCombined = std::numeric_limits::max(); uint64_t sysCombined = std::numeric_limits::max(); for (int i = 0; i < std::min(numActiveProcessors, 2); ++i) { GROUP_AFFINITY GroupAffinity; std::memset(&GroupAffinity, 0, sizeof(GROUP_AFFINITY)); GroupAffinity.Group = static_cast(procGroupIndex); GroupAffinity.Mask = static_cast(1) << i; status = SetThreadGroupAffinity(GetCurrentThread(), &GroupAffinity, nullptr); if (status == 0) { affinity.isOldDeterminate = false; return; } SwitchToThread(); DWORD_PTR proc2, sys2; status = GetProcessAffinityMask(GetCurrentProcess(), &proc2, &sys2); if (status == 0) { affinity.isOldDeterminate = false; return; } procCombined &= static_cast(proc2); sysCombined &= static_cast(sys2); } if (procCombined != sysCombined) isAffinityFull = false; for (size_t j = 0; j < WIN_PROCESSOR_GROUP_SIZE; ++j) { if (procCombined & (KAFFINITY(1) << j)) cpus.insert(procGroupIndex * WIN_PROCESSOR_GROUP_SIZE + j); } } // We have to detect the case where the affinity was not set, // or is set to all processors so that we correctly produce as // std::nullopt result. if (!isAffinityFull) { affinity.oldApi = std::move(cpus); } }); th.join(); } } return affinity; } // Type machinery used to emulate Cache->GroupCount template struct HasGroupCount: std::false_type {}; template struct HasGroupCount().Cache.GroupCount)>>: std::true_type { }; template::value, bool> = true> std::set readCacheMembers(const T* info, Pred&& is_cpu_allowed) { std::set cpus; // On Windows 10 this will read a 0 because GroupCount doesn't exist int groupCount = std::max(info->Cache.GroupCount, WORD(1)); for (WORD procGroup = 0; procGroup < groupCount; ++procGroup) { for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) { WORD groupNumber = info->Cache.GroupMasks[procGroup].Group; const CpuIndex c = static_cast(groupNumber) * WIN_PROCESSOR_GROUP_SIZE + static_cast(number); if (!(info->Cache.GroupMasks[procGroup].Mask & (1ULL << number)) || !is_cpu_allowed(c)) continue; cpus.insert(c); } } return cpus; } template::value, bool> = true> std::set readCacheMembers(const T* info, Pred&& is_cpu_allowed) { std::set cpus; for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) { WORD groupNumber = info->Cache.GroupMask.Group; const CpuIndex c = static_cast(groupNumber) * WIN_PROCESSOR_GROUP_SIZE + static_cast(number); if (!(info->Cache.GroupMask.Mask & (1ULL << number)) || !is_cpu_allowed(c)) continue; cpus.insert(c); } return cpus; } #endif #if defined(__linux__) && !defined(__ANDROID__) inline std::set get_process_affinity() { std::set cpus; // For unsupported systems, or in case of a soft error, we may assume // all processors are available for use. [[maybe_unused]] auto set_to_all_cpus = [&]() { for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) cpus.insert(c); }; // cpu_set_t by default holds 1024 entries. This may not be enough soon, // but there is no easy way to determine how many threads there actually // is. In this case we just choose a reasonable upper bound. static constexpr CpuIndex MaxNumCpus = 1024 * 64; cpu_set_t* mask = CPU_ALLOC(MaxNumCpus); if (mask == nullptr) std::exit(EXIT_FAILURE); const size_t masksize = CPU_ALLOC_SIZE(MaxNumCpus); CPU_ZERO_S(masksize, mask); const int status = sched_getaffinity(0, masksize, mask); if (status != 0) { CPU_FREE(mask); std::exit(EXIT_FAILURE); } for (CpuIndex c = 0; c < MaxNumCpus; ++c) if (CPU_ISSET_S(c, masksize, mask)) cpus.insert(c); CPU_FREE(mask); return cpus; } #endif #if defined(__linux__) && !defined(__ANDROID__) inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); #elif defined(_WIN64) inline static const auto STARTUP_PROCESSOR_AFFINITY = get_process_affinity(); inline static const auto STARTUP_USE_OLD_AFFINITY_API = STARTUP_PROCESSOR_AFFINITY.likely_used_old_api(); #endif // We want to abstract the purpose of storing the numa node index somewhat. // Whoever is using this does not need to know the specifics of the replication // machinery to be able to access NUMA replicated memory. class NumaReplicatedAccessToken { public: NumaReplicatedAccessToken() : n(0) {} explicit NumaReplicatedAccessToken(NumaIndex idx) : n(idx) {} NumaIndex get_numa_index() const { return n; } private: NumaIndex n; }; struct L3Domain { NumaIndex systemNumaIndex{}; std::set cpus{}; }; // Use system NUMA nodes struct SystemNumaPolicy {}; // Use system-reported L3 domains struct L3DomainsPolicy {}; // Group system-reported L3 domains until they reach bundleSize struct BundledL3Policy { size_t bundleSize; }; using NumaAutoPolicy = std::variant; // Designed as immutable, because there is no good reason to alter an already // existing config in a way that doesn't require recreating it completely, and // it would be complex and expensive to maintain class invariants. // The CPU (processor) numbers always correspond to the actual numbering used // by the system. The NUMA node numbers MAY NOT correspond to the system's // numbering of the NUMA nodes. In particular, by default, if the processor has // non-uniform cache access within a NUMA node (i.e., a non-unified L3 cache structure), // then L3 domains within a system NUMA node will be used to subdivide it // into multiple logical NUMA nodes in the config. Additionally, empty nodes may // be removed, or the user may create custom nodes. // // As a special case, when performing system-wide replication of read-only data // (i.e., LazyNumaReplicatedSystemWide), the system NUMA node is used, rather than // custom or L3-aware nodes. See that class's get_discriminator() function. // // It is guaranteed that NUMA nodes are NOT empty: every node exposed by NumaConfig // has at least one processor assigned. // // We use startup affinities so as not to modify its own behaviour in time. // // Since Stockfish doesn't support exceptions all places where an exception // should be thrown are replaced by std::exit. class NumaConfig { public: NumaConfig() : highestCpuIndex(0), customAffinity(false) { const auto numCpus = SYSTEM_THREADS_NB; add_cpu_range_to_node(NumaIndex{0}, CpuIndex{0}, numCpus - 1); } // This function gets a NumaConfig based on the system's provided information. // The available policies are documented above. static NumaConfig from_system([[maybe_unused]] const NumaAutoPolicy& policy, bool respectProcessAffinity = true) { NumaConfig cfg = empty(); #if !((defined(__linux__) && !defined(__ANDROID__)) || defined(_WIN64)) // Fallback for unsupported systems. for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) cfg.add_cpu_to_node(NumaIndex{0}, c); #else #if defined(_WIN64) std::optional> allowedCpus; if (respectProcessAffinity) allowedCpus = STARTUP_PROCESSOR_AFFINITY.get_combined(); // The affinity cannot be determined in all cases on Windows, // but we at least guarantee that the number of allowed processors // is >= number of processors in the affinity mask. In case the user // is not satisfied they must set the processor numbers explicitly. auto is_cpu_allowed = [&allowedCpus](CpuIndex c) { return !allowedCpus.has_value() || allowedCpus->count(c) == 1; }; #elif defined(__linux__) && !defined(__ANDROID__) std::set allowedCpus; if (respectProcessAffinity) allowedCpus = STARTUP_PROCESSOR_AFFINITY; auto is_cpu_allowed = [respectProcessAffinity, &allowedCpus](CpuIndex c) { return !respectProcessAffinity || allowedCpus.count(c) == 1; }; #endif bool l3Success = false; if (!std::holds_alternative(policy)) { size_t l3BundleSize = 0; if (const auto* v = std::get_if(&policy)) { l3BundleSize = v->bundleSize; } if (auto l3Cfg = try_get_l3_aware_config(respectProcessAffinity, l3BundleSize, is_cpu_allowed)) { cfg = std::move(*l3Cfg); l3Success = true; } } if (!l3Success) cfg = from_system_numa(respectProcessAffinity, is_cpu_allowed); #if defined(_WIN64) // Split the NUMA nodes to be contained within a group if necessary. // This is needed between Windows 10 Build 20348 and Windows 11, because // the new NUMA allocation behaviour was introduced while there was // still no way to set thread affinity spanning multiple processor groups. // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support // We also do this is if need to force old API for some reason. // // 2024-08-26: It appears that we need to actually always force this behaviour. // While Windows allows this to work now, such assignments have bad interaction // with the scheduler - in particular it still prefers scheduling on the thread's // "primary" node, even if it means scheduling SMT processors first. // See https://github.com/official-stockfish/Stockfish/issues/5551 // See https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups // // Each process is assigned a primary group at creation, and by default all // of its threads' primary group is the same. Each thread's ideal processor // is in the thread's primary group, so threads will preferentially be // scheduled to processors on their primary group, but they are able to // be scheduled to processors on any other group. // // used to be guarded by if (STARTUP_USE_OLD_AFFINITY_API) { NumaConfig splitCfg = empty(); NumaIndex splitNodeIndex = 0; for (const auto& cpus : cfg.nodes) { if (cpus.empty()) continue; size_t lastProcGroupIndex = *(cpus.begin()) / WIN_PROCESSOR_GROUP_SIZE; for (CpuIndex c : cpus) { const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; if (procGroupIndex != lastProcGroupIndex) { splitNodeIndex += 1; lastProcGroupIndex = procGroupIndex; } splitCfg.add_cpu_to_node(splitNodeIndex, c); } splitNodeIndex += 1; } cfg = std::move(splitCfg); } #endif #endif // We have to ensure no empty NUMA nodes persist. cfg.remove_empty_numa_nodes(); // If the user explicitly opts out from respecting the current process affinity // then it may be inconsistent with the current affinity (obviously), so we // consider it custom. if (!respectProcessAffinity) cfg.customAffinity = true; return cfg; } // ':'-separated numa nodes // ','-separated cpu indices // supports "first-last" range syntax for cpu indices // For example "0-15,128-143:16-31,144-159:32-47,160-175:48-63,176-191" static NumaConfig from_string(const std::string& s) { NumaConfig cfg = empty(); NumaIndex n = 0; for (auto&& nodeStr : split(s, ":")) { auto indices = indices_from_shortened_string(std::string(nodeStr)); if (!indices.empty()) { for (auto idx : indices) { if (!cfg.add_cpu_to_node(n, CpuIndex(idx))) std::exit(EXIT_FAILURE); } n += 1; } } cfg.customAffinity = true; return cfg; } NumaConfig(const NumaConfig&) = delete; NumaConfig(NumaConfig&&) = default; NumaConfig& operator=(const NumaConfig&) = delete; NumaConfig& operator=(NumaConfig&&) = default; bool is_cpu_assigned(CpuIndex n) const { return nodeByCpu.count(n) == 1; } NumaIndex num_numa_nodes() const { return nodes.size(); } CpuIndex num_cpus_in_numa_node(NumaIndex n) const { assert(n < nodes.size()); return nodes[n].size(); } CpuIndex num_cpus() const { return nodeByCpu.size(); } bool requires_memory_replication() const { return customAffinity || nodes.size() > 1; } std::string to_string() const { std::string str; bool isFirstNode = true; for (auto&& cpus : nodes) { if (!isFirstNode) str += ":"; bool isFirstSet = true; auto rangeStart = cpus.begin(); for (auto it = cpus.begin(); it != cpus.end(); ++it) { auto next = std::next(it); if (next == cpus.end() || *next != *it + 1) { // cpus[i] is at the end of the range (may be of size 1) if (!isFirstSet) str += ","; const CpuIndex last = *it; if (it != rangeStart) { const CpuIndex first = *rangeStart; str += std::to_string(first); str += "-"; str += std::to_string(last); } else str += std::to_string(last); rangeStart = next; isFirstSet = false; } } isFirstNode = false; } return str; } bool suggests_binding_threads(CpuIndex numThreads) const { // If we can reasonably determine that the threads cannot be contained // by the OS within the first NUMA node then we advise distributing // and binding threads. When the threads are not bound we can only use // NUMA memory replicated objects from the first node, so when the OS // has to schedule on other nodes we lose performance. We also suggest // binding if there's enough threads to distribute among nodes with minimal // disparity. We try to ignore small nodes, in particular the empty ones. // If the affinity set by the user does not match the affinity given by // the OS then binding is necessary to ensure the threads are running on // correct processors. if (customAffinity) return true; // We obviously cannot distribute a single thread, so a single thread // should never be bound. if (numThreads <= 1) return false; size_t largestNodeSize = 0; for (auto&& cpus : nodes) if (cpus.size() > largestNodeSize) largestNodeSize = cpus.size(); auto is_node_small = [largestNodeSize](const std::set& node) { static constexpr double SmallNodeThreshold = 0.6; return static_cast(node.size()) / static_cast(largestNodeSize) <= SmallNodeThreshold; }; size_t numNotSmallNodes = 0; for (auto&& cpus : nodes) if (!is_node_small(cpus)) numNotSmallNodes += 1; return (numThreads > largestNodeSize / 2 || numThreads >= numNotSmallNodes * 4) && nodes.size() > 1; } std::vector distribute_threads_among_numa_nodes(CpuIndex numThreads) const { std::vector ns; if (nodes.size() == 1) { // Special case for when there's no NUMA nodes. This doesn't buy us // much, but let's keep the default path simple. ns.resize(numThreads, NumaIndex{0}); } else { std::vector occupation(nodes.size(), 0); for (CpuIndex c = 0; c < numThreads; ++c) { NumaIndex bestNode{0}; float bestNodeFill = std::numeric_limits::max(); for (NumaIndex n = 0; n < nodes.size(); ++n) { float fill = static_cast(occupation[n] + 1) / static_cast(nodes[n].size()); // NOTE: Do we want to perhaps fill the first available node // up to 50% first before considering other nodes? // Probably not, because it would interfere with running // multiple instances. We basically shouldn't favor any // particular node. if (fill < bestNodeFill) { bestNode = n; bestNodeFill = fill; } } ns.emplace_back(bestNode); occupation[bestNode] += 1; } } return ns; } NumaReplicatedAccessToken bind_current_thread_to_numa_node(NumaIndex n) const { if (n >= nodes.size() || nodes[n].size() == 0) std::exit(EXIT_FAILURE); #if defined(__linux__) && !defined(__ANDROID__) cpu_set_t* mask = CPU_ALLOC(highestCpuIndex + 1); if (mask == nullptr) std::exit(EXIT_FAILURE); const size_t masksize = CPU_ALLOC_SIZE(highestCpuIndex + 1); CPU_ZERO_S(masksize, mask); for (CpuIndex c : nodes[n]) CPU_SET_S(c, masksize, mask); const int status = sched_setaffinity(0, masksize, mask); CPU_FREE(mask); if (status != 0) std::exit(EXIT_FAILURE); // We yield this thread just to be sure it gets rescheduled. // This is defensive, allowed because this code is not performance critical. sched_yield(); #elif defined(_WIN64) // Requires Windows 11. No good way to set thread affinity spanning // processor groups before that. HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto SetThreadSelectedCpuSetMasks_f = SetThreadSelectedCpuSetMasks_t( (void (*)()) GetProcAddress(k32, "SetThreadSelectedCpuSetMasks")); // We ALWAYS set affinity with the new API if available, because // there's no downsides, and we forcibly keep it consistent with // the old API should we need to use it. I.e. we always keep this // as a superset of what we set with SetThreadGroupAffinity. if (SetThreadSelectedCpuSetMasks_f != nullptr) { // Only available on Windows 11 and Windows Server 2022 onwards const USHORT numProcGroups = USHORT( ((highestCpuIndex + 1) + WIN_PROCESSOR_GROUP_SIZE - 1) / WIN_PROCESSOR_GROUP_SIZE); auto groupAffinities = std::make_unique(numProcGroups); std::memset(groupAffinities.get(), 0, sizeof(GROUP_AFFINITY) * numProcGroups); for (WORD i = 0; i < numProcGroups; ++i) groupAffinities[i].Group = i; for (CpuIndex c : nodes[n]) { const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; groupAffinities[procGroupIndex].Mask |= KAFFINITY(1) << idxWithinProcGroup; } HANDLE hThread = GetCurrentThread(); const BOOL status = SetThreadSelectedCpuSetMasks_f(hThread, groupAffinities.get(), numProcGroups); if (status == 0) std::exit(EXIT_FAILURE); // We yield this thread just to be sure it gets rescheduled. // This is defensive, allowed because this code is not performance critical. SwitchToThread(); } // Sometimes we need to force the old API, but do not use it unless necessary. if (SetThreadSelectedCpuSetMasks_f == nullptr || STARTUP_USE_OLD_AFFINITY_API) { // On earlier windows version (since windows 7) we cannot run a single thread // on multiple processor groups, so we need to restrict the group. // We assume the group of the first processor listed for this node. // Processors from outside this group will not be assigned for this thread. // Normally this won't be an issue because windows used to assign NUMA nodes // such that they cannot span processor groups. However, since Windows 10 // Build 20348 the behaviour changed, so there's a small window of versions // between this and Windows 11 that might exhibit problems with not all // processors being utilized. // // We handle this in NumaConfig::from_system by manually splitting the // nodes when we detect that there is no function to set affinity spanning // processor nodes. This is required because otherwise our thread distribution // code may produce suboptimal results. // // See https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support GROUP_AFFINITY affinity; std::memset(&affinity, 0, sizeof(GROUP_AFFINITY)); // We use an ordered set to be sure to get the smallest cpu number here. const size_t forcedProcGroupIndex = *(nodes[n].begin()) / WIN_PROCESSOR_GROUP_SIZE; affinity.Group = static_cast(forcedProcGroupIndex); for (CpuIndex c : nodes[n]) { const size_t procGroupIndex = c / WIN_PROCESSOR_GROUP_SIZE; const size_t idxWithinProcGroup = c % WIN_PROCESSOR_GROUP_SIZE; // We skip processors that are not in the same processor group. // If everything was set up correctly this will never be an issue, // but we have to account for bad NUMA node specification. if (procGroupIndex != forcedProcGroupIndex) continue; affinity.Mask |= KAFFINITY(1) << idxWithinProcGroup; } HANDLE hThread = GetCurrentThread(); const BOOL status = SetThreadGroupAffinity(hThread, &affinity, nullptr); if (status == 0) std::exit(EXIT_FAILURE); // We yield this thread just to be sure it gets rescheduled. This is // defensive, allowed because this code is not performance critical. SwitchToThread(); } #endif return NumaReplicatedAccessToken(n); } template void execute_on_numa_node(NumaIndex n, FuncT&& f) const { std::thread th([this, &f, n]() { bind_current_thread_to_numa_node(n); std::forward(f)(); }); th.join(); } std::vector> nodes; std::map nodeByCpu; private: CpuIndex highestCpuIndex; bool customAffinity; static NumaConfig empty() { return NumaConfig(EmptyNodeTag{}); } struct EmptyNodeTag {}; NumaConfig(EmptyNodeTag) : highestCpuIndex(0), customAffinity(false) {} void remove_empty_numa_nodes() { std::vector> newNodes; for (auto&& cpus : nodes) if (!cpus.empty()) newNodes.emplace_back(std::move(cpus)); nodes = std::move(newNodes); } // Returns true if successful // Returns false if failed, i.e. when the cpu is already present // strong guarantee, the structure remains unmodified bool add_cpu_to_node(NumaIndex n, CpuIndex c) { if (is_cpu_assigned(c)) return false; while (nodes.size() <= n) nodes.emplace_back(); nodes[n].insert(c); nodeByCpu[c] = n; if (c > highestCpuIndex) highestCpuIndex = c; return true; } // Returns true if successful // Returns false if failed, i.e. when any of the cpus is already present // strong guarantee, the structure remains unmodified bool add_cpu_range_to_node(NumaIndex n, CpuIndex cfirst, CpuIndex clast) { for (CpuIndex c = cfirst; c <= clast; ++c) if (is_cpu_assigned(c)) return false; while (nodes.size() <= n) nodes.emplace_back(); for (CpuIndex c = cfirst; c <= clast; ++c) { nodes[n].insert(c); nodeByCpu[c] = n; } if (clast > highestCpuIndex) highestCpuIndex = clast; return true; } static std::vector indices_from_shortened_string(const std::string& s) { std::vector indices; if (s.empty()) return indices; for (const auto& ss : split(s, ",")) { if (ss.empty()) continue; auto parts = split(ss, "-"); if (parts.size() == 1) { const CpuIndex c = CpuIndex{str_to_size_t(std::string(parts[0]))}; indices.emplace_back(c); } else if (parts.size() == 2) { const CpuIndex cfirst = CpuIndex{str_to_size_t(std::string(parts[0]))}; const CpuIndex clast = CpuIndex{str_to_size_t(std::string(parts[1]))}; for (size_t c = cfirst; c <= clast; ++c) { indices.emplace_back(c); } } } return indices; } // This function queries the system for the mapping of processors to NUMA nodes. // On Linux we read from standardized kernel sysfs, with a fallback to single NUMA // node. On Windows we utilize GetNumaProcessorNodeEx, which has its quirks, see // comment for Windows implementation of get_process_affinity. template static NumaConfig from_system_numa([[maybe_unused]] bool respectProcessAffinity, [[maybe_unused]] Pred&& is_cpu_allowed) { NumaConfig cfg = empty(); #if defined(__linux__) && !defined(__ANDROID__) // On Linux things are straightforward, since there's no processor groups and // any thread can be scheduled on all processors. // We try to gather this information from the sysfs first // https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node bool useFallback = false; auto fallback = [&]() { useFallback = true; cfg = empty(); }; // /sys/devices/system/node/online contains information about active NUMA nodes auto nodeIdsStr = read_file_to_string("/sys/devices/system/node/online"); if (!nodeIdsStr.has_value() || nodeIdsStr->empty()) { fallback(); } else { remove_whitespace(*nodeIdsStr); for (size_t n : indices_from_shortened_string(*nodeIdsStr)) { // /sys/devices/system/node/node.../cpulist std::string path = std::string("/sys/devices/system/node/node") + std::to_string(n) + "/cpulist"; auto cpuIdsStr = read_file_to_string(path); // Now, we only bail if the file does not exist. Some nodes may be // empty, that's fine. An empty node still has a file that appears // to have some whitespace, so we need to handle that. if (!cpuIdsStr.has_value()) { fallback(); break; } else { remove_whitespace(*cpuIdsStr); for (size_t c : indices_from_shortened_string(*cpuIdsStr)) { if (is_cpu_allowed(c)) cfg.add_cpu_to_node(n, c); } } } } if (useFallback) { for (CpuIndex c = 0; c < SYSTEM_THREADS_NB; ++c) if (is_cpu_allowed(c)) cfg.add_cpu_to_node(NumaIndex{0}, c); } #elif defined(_WIN64) WORD numProcGroups = GetActiveProcessorGroupCount(); for (WORD procGroup = 0; procGroup < numProcGroups; ++procGroup) { for (BYTE number = 0; number < WIN_PROCESSOR_GROUP_SIZE; ++number) { PROCESSOR_NUMBER procnum; procnum.Group = procGroup; procnum.Number = number; procnum.Reserved = 0; USHORT nodeNumber; const BOOL status = GetNumaProcessorNodeEx(&procnum, &nodeNumber); const CpuIndex c = static_cast(procGroup) * WIN_PROCESSOR_GROUP_SIZE + static_cast(number); if (status != 0 && nodeNumber != std::numeric_limits::max() && is_cpu_allowed(c)) { cfg.add_cpu_to_node(nodeNumber, c); } } } #else abort(); // should not reach here #endif return cfg; } template static std::optional try_get_l3_aware_config( bool respectProcessAffinity, size_t bundleSize, [[maybe_unused]] Pred&& is_cpu_allowed) { // Get the normal system configuration so we know to which NUMA node // each L3 domain belongs. NumaConfig systemConfig = NumaConfig::from_system(SystemNumaPolicy{}, respectProcessAffinity); std::vector l3Domains; #if defined(__linux__) && !defined(__ANDROID__) std::set seenCpus; auto nextUnseenCpu = [&seenCpus]() { for (CpuIndex i = 0;; ++i) if (!seenCpus.count(i)) return i; }; while (true) { CpuIndex next = nextUnseenCpu(); auto siblingsStr = read_file_to_string("/sys/devices/system/cpu/cpu" + std::to_string(next) + "/cache/index3/shared_cpu_list"); if (!siblingsStr.has_value() || siblingsStr->empty()) { break; // we have read all available CPUs } L3Domain domain; for (size_t c : indices_from_shortened_string(*siblingsStr)) { if (is_cpu_allowed(c)) { domain.systemNumaIndex = systemConfig.nodeByCpu.at(c); domain.cpus.insert(c); } seenCpus.insert(c); } if (!domain.cpus.empty()) { l3Domains.emplace_back(std::move(domain)); } } #elif defined(_WIN64) DWORD bufSize = 0; GetLogicalProcessorInformationEx(RelationCache, nullptr, &bufSize); if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return std::nullopt; std::vector buffer(bufSize); auto info = reinterpret_cast(buffer.data()); if (!GetLogicalProcessorInformationEx(RelationCache, info, &bufSize)) return std::nullopt; while (reinterpret_cast(info) < buffer.data() + bufSize) { info = std::launder(info); if (info->Relationship == RelationCache && info->Cache.Level == 3) { L3Domain domain{}; domain.cpus = readCacheMembers(info, is_cpu_allowed); if (!domain.cpus.empty()) { domain.systemNumaIndex = systemConfig.nodeByCpu.at(*domain.cpus.begin()); l3Domains.push_back(std::move(domain)); } } // Variable length data structure, advance to next info = reinterpret_cast( reinterpret_cast(info) + info->Size); } #endif if (!l3Domains.empty()) return {NumaConfig::from_l3_info(std::move(l3Domains), bundleSize)}; return std::nullopt; } static NumaConfig from_l3_info(std::vector&& domains, size_t bundleSize) { assert(!domains.empty()); std::map> list; for (auto& d : domains) list[d.systemNumaIndex].emplace_back(std::move(d)); NumaConfig cfg = empty(); NumaIndex n = 0; for (auto& [_, ds] : list) { bool changed; // Scan through pairs and merge them. With roughly equal L3 sizes, should give // a decent distribution. do { changed = false; for (size_t j = 0; j + 1 < ds.size(); ++j) { if (ds[j].cpus.size() + ds[j + 1].cpus.size() <= bundleSize) { changed = true; ds[j].cpus.merge(ds[j + 1].cpus); ds.erase(ds.begin() + j + 1); } } // ds.size() has decreased if changed is true, so this loop will terminate } while (changed); for (const L3Domain& d : ds) { const NumaIndex dn = n++; for (CpuIndex cpu : d.cpus) { cfg.add_cpu_to_node(dn, cpu); } } } return cfg; } }; class NumaReplicationContext; // Instances of this class are tracked by the NumaReplicationContext instance. // NumaReplicationContext informs all tracked instances when NUMA configuration changes. class NumaReplicatedBase { public: NumaReplicatedBase(NumaReplicationContext& ctx); NumaReplicatedBase(const NumaReplicatedBase&) = delete; NumaReplicatedBase(NumaReplicatedBase&& other) noexcept; NumaReplicatedBase& operator=(const NumaReplicatedBase&) = delete; NumaReplicatedBase& operator=(NumaReplicatedBase&& other) noexcept; virtual void on_numa_config_changed() = 0; virtual ~NumaReplicatedBase(); const NumaConfig& get_numa_config() const; private: NumaReplicationContext* context; }; // We force boxing with a unique_ptr. If this becomes an issue due to added // indirection we may need to add an option for a custom boxing type. When the // NUMA config changes the value stored at the index 0 is replicated to other nodes. template class NumaReplicated: public NumaReplicatedBase { public: using ReplicatorFuncType = std::function; NumaReplicated(NumaReplicationContext& ctx) : NumaReplicatedBase(ctx) { replicate_from(T{}); } NumaReplicated(NumaReplicationContext& ctx, T&& source) : NumaReplicatedBase(ctx) { replicate_from(std::move(source)); } NumaReplicated(const NumaReplicated&) = delete; NumaReplicated(NumaReplicated&& other) noexcept : NumaReplicatedBase(std::move(other)), instances(std::exchange(other.instances, {})) {} NumaReplicated& operator=(const NumaReplicated&) = delete; NumaReplicated& operator=(NumaReplicated&& other) noexcept { NumaReplicatedBase::operator=(*this, std::move(other)); instances = std::exchange(other.instances, {}); return *this; } NumaReplicated& operator=(T&& source) { replicate_from(std::move(source)); return *this; } ~NumaReplicated() override = default; const T& operator[](NumaReplicatedAccessToken token) const { assert(token.get_numa_index() < instances.size()); return *(instances[token.get_numa_index()]); } const T& operator*() const { return *(instances[0]); } const T* operator->() const { return instances[0].get(); } template void modify_and_replicate(FuncT&& f) { auto source = std::move(instances[0]); std::forward(f)(*source); replicate_from(std::move(*source)); } void on_numa_config_changed() override { // Use the first one as the source. It doesn't matter which one we use, // because they all must be identical, but the first one is guaranteed to exist. auto source = std::move(instances[0]); replicate_from(std::move(*source)); } private: std::vector> instances; void replicate_from(T&& source) { instances.clear(); const NumaConfig& cfg = get_numa_config(); if (cfg.requires_memory_replication()) { for (NumaIndex n = 0; n < cfg.num_numa_nodes(); ++n) { cfg.execute_on_numa_node( n, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); } } else { assert(cfg.num_numa_nodes() == 1); // We take advantage of the fact that replication is not required // and reuse the source value, avoiding one copy operation. instances.emplace_back(std::make_unique(std::move(source))); } } }; // We force boxing with a unique_ptr. If this becomes an issue due to added // indirection we may need to add an option for a custom boxing type. template class LazyNumaReplicated: public NumaReplicatedBase { public: using ReplicatorFuncType = std::function; LazyNumaReplicated(NumaReplicationContext& ctx) : NumaReplicatedBase(ctx) { prepare_replicate_from(T{}); } LazyNumaReplicated(NumaReplicationContext& ctx, T&& source) : NumaReplicatedBase(ctx) { prepare_replicate_from(std::move(source)); } LazyNumaReplicated(const LazyNumaReplicated&) = delete; LazyNumaReplicated(LazyNumaReplicated&& other) noexcept : NumaReplicatedBase(std::move(other)), instances(std::exchange(other.instances, {})) {} LazyNumaReplicated& operator=(const LazyNumaReplicated&) = delete; LazyNumaReplicated& operator=(LazyNumaReplicated&& other) noexcept { NumaReplicatedBase::operator=(*this, std::move(other)); instances = std::exchange(other.instances, {}); return *this; } LazyNumaReplicated& operator=(T&& source) { prepare_replicate_from(std::move(source)); return *this; } ~LazyNumaReplicated() override = default; const T& operator[](NumaReplicatedAccessToken token) const { assert(token.get_numa_index() < instances.size()); ensure_present(token.get_numa_index()); return *(instances[token.get_numa_index()]); } const T& operator*() const { return *(instances[0]); } const T* operator->() const { return instances[0].get(); } template void modify_and_replicate(FuncT&& f) { auto source = std::move(instances[0]); std::forward(f)(*source); prepare_replicate_from(std::move(*source)); } void on_numa_config_changed() override { // Use the first one as the source. It doesn't matter which one we use, // because they all must be identical, but the first one is guaranteed to exist. auto source = std::move(instances[0]); prepare_replicate_from(std::move(*source)); } private: mutable std::vector> instances; mutable std::mutex mutex; void ensure_present(NumaIndex idx) const { assert(idx < instances.size()); if (instances[idx] != nullptr) return; assert(idx != 0); std::unique_lock lock(mutex); // Check again for races. if (instances[idx] != nullptr) return; const NumaConfig& cfg = get_numa_config(); cfg.execute_on_numa_node( idx, [this, idx]() { instances[idx] = std::make_unique(*instances[0]); }); } void prepare_replicate_from(T&& source) { instances.clear(); const NumaConfig& cfg = get_numa_config(); if (cfg.requires_memory_replication()) { assert(cfg.num_numa_nodes() > 0); // We just need to make sure the first instance is there. // Note that we cannot move here as we need to reallocate the data // on the correct NUMA node. cfg.execute_on_numa_node( 0, [this, &source]() { instances.emplace_back(std::make_unique(source)); }); // Prepare others for lazy init. instances.resize(cfg.num_numa_nodes()); } else { assert(cfg.num_numa_nodes() == 1); // We take advantage of the fact that replication is not required // and reuse the source value, avoiding one copy operation. instances.emplace_back(std::make_unique(std::move(source))); } } }; // Utilizes shared memory. template class LazyNumaReplicatedSystemWide: public NumaReplicatedBase { public: using ReplicatorFuncType = std::function; LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx) : NumaReplicatedBase(ctx) { prepare_replicate_from(std::make_unique()); } LazyNumaReplicatedSystemWide(NumaReplicationContext& ctx, std::unique_ptr&& source) : NumaReplicatedBase(ctx) { prepare_replicate_from(std::move(source)); } LazyNumaReplicatedSystemWide(const LazyNumaReplicatedSystemWide&) = delete; LazyNumaReplicatedSystemWide(LazyNumaReplicatedSystemWide&& other) noexcept : NumaReplicatedBase(std::move(other)), instances(std::exchange(other.instances, {})) {} LazyNumaReplicatedSystemWide& operator=(const LazyNumaReplicatedSystemWide&) = delete; LazyNumaReplicatedSystemWide& operator=(LazyNumaReplicatedSystemWide&& other) noexcept { NumaReplicatedBase::operator=(*this, std::move(other)); instances = std::exchange(other.instances, {}); return *this; } LazyNumaReplicatedSystemWide& operator=(std::unique_ptr&& source) { prepare_replicate_from(std::move(source)); return *this; } ~LazyNumaReplicatedSystemWide() override = default; const T& operator[](NumaReplicatedAccessToken token) const { assert(token.get_numa_index() < instances.size()); ensure_present(token.get_numa_index()); return *(instances[token.get_numa_index()]); } const T& operator*() const { return *(instances[0]); } const T* operator->() const { return &*instances[0]; } std::vector>> get_status_and_errors() const { std::vector>> status; status.reserve(instances.size()); for (const auto& instance : instances) { status.emplace_back(instance.get_status(), instance.get_error_message()); } return status; } template void modify_and_replicate(FuncT&& f) { auto source = std::make_unique(*instances[0]); std::forward(f)(*source); prepare_replicate_from(std::move(source)); } void on_numa_config_changed() override { // Use the first one as the source. It doesn't matter which one we use, // because they all must be identical, but the first one is guaranteed to exist. auto source = std::make_unique(*instances[0]); prepare_replicate_from(std::move(source)); } private: mutable std::vector> instances; mutable std::mutex mutex; std::size_t get_discriminator(NumaIndex idx) const { const NumaConfig& cfg = get_numa_config(); const NumaConfig& cfg_sys = NumaConfig::from_system(SystemNumaPolicy{}, false); // as a discriminator, locate the hardware/system numadomain this cpuindex belongs to CpuIndex cpu = *cfg.nodes[idx].begin(); // get a CpuIndex from NumaIndex NumaIndex sys_idx = cfg_sys.is_cpu_assigned(cpu) ? cfg_sys.nodeByCpu.at(cpu) : 0; std::string s = cfg_sys.to_string() + "$" + std::to_string(sys_idx); return static_cast(hash_string(s)); } void ensure_present(NumaIndex idx) const { assert(idx < instances.size()); if (instances[idx] != nullptr) return; assert(idx != 0); std::unique_lock lock(mutex); // Check again for races. if (instances[idx] != nullptr) return; const NumaConfig& cfg = get_numa_config(); cfg.execute_on_numa_node(idx, [this, idx]() { instances[idx] = SystemWideSharedConstant(*instances[0], get_discriminator(idx)); }); } void prepare_replicate_from(std::unique_ptr&& source) { instances.clear(); const NumaConfig& cfg = get_numa_config(); // We just need to make sure the first instance is there. // Note that we cannot move here as we need to reallocate the data // on the correct NUMA node. // Even in the case of a single NUMA node we have to copy since it's shared memory. if (cfg.requires_memory_replication()) { assert(cfg.num_numa_nodes() > 0); cfg.execute_on_numa_node(0, [this, &source]() { instances.emplace_back(SystemWideSharedConstant(*source, get_discriminator(0))); }); // Prepare others for lazy init. instances.resize(cfg.num_numa_nodes()); } else { assert(cfg.num_numa_nodes() == 1); instances.emplace_back(SystemWideSharedConstant(*source, get_discriminator(0))); } } }; class NumaReplicationContext { public: NumaReplicationContext(NumaConfig&& cfg) : config(std::move(cfg)) {} NumaReplicationContext(const NumaReplicationContext&) = delete; NumaReplicationContext(NumaReplicationContext&&) = delete; NumaReplicationContext& operator=(const NumaReplicationContext&) = delete; NumaReplicationContext& operator=(NumaReplicationContext&&) = delete; ~NumaReplicationContext() { // The context must outlive replicated objects if (!trackedReplicatedObjects.empty()) std::exit(EXIT_FAILURE); } void attach(NumaReplicatedBase* obj) { assert(trackedReplicatedObjects.count(obj) == 0); trackedReplicatedObjects.insert(obj); } void detach(NumaReplicatedBase* obj) { assert(trackedReplicatedObjects.count(obj) == 1); trackedReplicatedObjects.erase(obj); } // oldObj may be invalid at this point void move_attached([[maybe_unused]] NumaReplicatedBase* oldObj, NumaReplicatedBase* newObj) { assert(trackedReplicatedObjects.count(oldObj) == 1); assert(trackedReplicatedObjects.count(newObj) == 0); trackedReplicatedObjects.erase(oldObj); trackedReplicatedObjects.insert(newObj); } void set_numa_config(NumaConfig&& cfg) { config = std::move(cfg); for (auto&& obj : trackedReplicatedObjects) obj->on_numa_config_changed(); } const NumaConfig& get_numa_config() const { return config; } private: NumaConfig config; // std::set uses std::less by default, which is required for pointer comparison std::set trackedReplicatedObjects; }; inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicationContext& ctx) : context(&ctx) { context->attach(this); } inline NumaReplicatedBase::NumaReplicatedBase(NumaReplicatedBase&& other) noexcept : context(std::exchange(other.context, nullptr)) { context->move_attached(&other, this); } inline NumaReplicatedBase& NumaReplicatedBase::operator=(NumaReplicatedBase&& other) noexcept { context = std::exchange(other.context, nullptr); context->move_attached(&other, this); return *this; } inline NumaReplicatedBase::~NumaReplicatedBase() { if (context != nullptr) context->detach(this); } inline const NumaConfig& NumaReplicatedBase::get_numa_config() const { return context->get_numa_config(); } } // namespace Stockfish #endif // #ifndef NUMA_H_INCLUDED ================================================ FILE: src/perft.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef PERFT_H_INCLUDED #define PERFT_H_INCLUDED #include #include "movegen.h" #include "position.h" #include "types.h" #include "uci.h" namespace Stockfish::Benchmark { // Utility to verify move generation. All the leaf nodes up // to the given depth are generated and counted, and the sum is returned. template uint64_t perft(Position& pos, Depth depth) { StateInfo st; uint64_t cnt, nodes = 0; const bool leaf = (depth == 2); for (const auto& m : MoveList(pos)) { if (Root && depth <= 1) cnt = 1, nodes++; else { pos.do_move(m, st); cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); nodes += cnt; pos.undo_move(m); } if (Root) sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; } return nodes; } inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) { StateInfo st; Position p; p.set(fen, isChess960, &st); return perft(p, depth); } } #endif // PERFT_H_INCLUDED ================================================ FILE: src/position.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "position.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "bitboard.h" #include "history.h" #include "misc.h" #include "movegen.h" #include "syzygy/tbprobe.h" #include "tt.h" #include "uci.h" using std::string; namespace Stockfish { namespace Zobrist { Key psq[PIECE_NB][SQUARE_NB]; Key enpassant[FILE_NB]; Key castling[CASTLING_RIGHT_NB]; Key side, noPawns; } namespace { constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); static constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING}; } // namespace // Returns an ASCII representation of the position std::ostream& operator<<(std::ostream& os, const Position& pos) { os << "\n +---+---+---+---+---+---+---+---+\n"; for (Rank r = RANK_8;; --r) { for (File f = FILE_A; f <= FILE_H; ++f) os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; if (r == RANK_1) break; } os << " a b c d e f g h\n" << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase << std::setfill('0') << std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: "; for (Bitboard b = pos.checkers(); b;) os << UCIEngine::square(pop_lsb(b)) << " "; if (Tablebases::MaxCardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { StateInfo st; Position p; p.set(pos.fen(), pos.is_chess960(), &st); Tablebases::ProbeState s1, s2; Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); int dtz = Tablebases::probe_dtz(p, &s2); os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; } return os; } // Implements Marcel van Kervinck's cuckoo algorithm to detect repetition of positions // for 3-fold repetition draws. The algorithm uses two hash tables with Zobrist hashes // to allow fast detection of recurring positions. For details see: // http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf // First and second hash functions for indexing the cuckoo tables inline int H1(Key h) { return h & 0x1fff; } inline int H2(Key h) { return (h >> 16) & 0x1fff; } // Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves std::array cuckoo; std::array cuckooMove; // Initializes at startup the various arrays used to compute hash keys void Position::init() { PRNG rng(1070372); for (Piece pc : Pieces) for (Square s = SQ_A1; s <= SQ_H8; ++s) Zobrist::psq[pc][s] = rng.rand(); // pawns on these squares will promote std::fill_n(Zobrist::psq[W_PAWN] + SQ_A8, 8, 0); std::fill_n(Zobrist::psq[B_PAWN], 8, 0); for (File f = FILE_A; f <= FILE_H; ++f) Zobrist::enpassant[f] = rng.rand(); for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) Zobrist::castling[cr] = rng.rand(); Zobrist::side = rng.rand(); Zobrist::noPawns = rng.rand(); // Prepare the cuckoo tables cuckoo.fill(0); cuckooMove.fill(Move::none()); [[maybe_unused]] int count = 0; for (Piece pc : Pieces) for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) { Move move = Move(s1, s2); Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; int i = H1(key); while (true) { std::swap(cuckoo[i], key); std::swap(cuckooMove[i], move); if (move == Move::none()) // Arrived at empty slot? break; i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot } count++; } assert(count == 3668); } // Initializes the position object with the given FEN string. // The FEN string is strictly validated; if it is invalid or inconsistent, // a PositionSetError describing the problem is returned, otherwise std::nullopt. std::optional Position::set(const string& fenStr, bool isChess960, StateInfo* si) { /* A FEN string defines a particular position using only the ASCII character set. A FEN string contains six fields separated by a space. The fields are: 1) Piece placement (from white's perspective). Each rank is described, starting with rank 8 and ending with rank 1. Within each rank, the contents of each square are described from file A through file H. Following the Standard Algebraic Notation (SAN), each piece is identified by a single letter taken from the standard English names. White pieces are designated using upper-case letters ("PNBRQK") whilst Black uses lowercase ("pnbrqk"). Blank squares are noted using digits 1 through 8 (the number of blank squares), and "/" separates ranks. 2) Active color. "w" means white moves next, "b" means black. 3) Castling availability. If neither side can castle, this is "-". Otherwise, this has one or more letters: "K" (White can castle kingside), "Q" (White can castle queenside), "k" (Black can castle kingside), and/or "q" (Black can castle queenside). 4) En passant target square (in algebraic notation). If there's no en passant target square, this is "-". If a pawn has just made a 2-square move, this is the position "behind" the pawn. Following X-FEN standard, this is recorded only if there is a pawn in position to make an en passant capture, and if there really is a pawn that might have advanced two squares. 5) Halfmove clock. This is the number of halfmoves since the last pawn advance or capture. This is used to determine if a draw can be claimed under the fifty-move rule. 6) Fullmove number. The number of the full move. It starts at 1, and is incremented after Black's move. */ unsigned char token; std::istringstream ss(fenStr); std::memset(reinterpret_cast(this), 0, sizeof(Position)); std::memset(si, 0, sizeof(StateInfo)); st = si; ss >> std::noskipws; int numPieces = 0; int file = FILE_A; int rank = RANK_8; // 1. Piece placement for (;;) { if (!(ss >> token)) return PositionSetError("Invalid FEN. Unexpected end of stream."); if (isspace(token)) break; if (isdigit(token)) { const int diff = (token - '0'); if (diff < 1 || diff > 8) return PositionSetError("Invalid FEN. Invalid number of squares to skip."); file += diff; if (file > FILE_NB) return PositionSetError("Invalid FEN. Invalid file reached."); } else if (token == '/') { if (file != FILE_NB) return PositionSetError( "Invalid FEN. Trying to end rank when not at the end of it."); --rank; file = FILE_A; if (rank < RANK_1) return PositionSetError("Invalid FEN. Invalid rank reached."); } else { if (file >= FILE_NB) return PositionSetError("Invalid FEN. Invalid file reached."); const size_t idx = PieceToChar.find(token); if (idx == string::npos) return PositionSetError(std::string("Invalid FEN. Invalid piece: ") + std::string(1, token)); if (++numPieces > 32) return PositionSetError("Invalid FEN. More than 32 pieces on the board."); const Square sq = make_square(File(file), Rank(rank)); put_piece(Piece(idx), sq); ++file; } } if (rank != RANK_1 || file != FILE_NB) return PositionSetError("Invalid FEN. Board state encoding ended but cursor not at end."); if (pieces(PAWN) & (RANK_1 | RANK_8)) return PositionSetError("Unsupported position. Pawns on the first or eighth rank."); if (count(WHITE) != 1 || count(BLACK) != 1) return PositionSetError("Unsupported position. Incorrect number of kings."); const int wPawns = count(WHITE); const int bPawns = count(BLACK); if (wPawns > 8) return PositionSetError("Unsupported position. WHITE has more than 8 pawns."); if (bPawns > 8) return PositionSetError("Unsupported position. BLACK has more than 8 pawns."); const int wAdditionalKnights = std::max((int) count(WHITE) - 2, 0); const int bAdditionalKnights = std::max((int) count(BLACK) - 2, 0); const int wAdditionalBishops = std::max((int) count(WHITE) - 2, 0); const int bAdditionalBishops = std::max((int) count(BLACK) - 2, 0); const int wAdditionalRooks = std::max((int) count(WHITE) - 2, 0); const int bAdditionalRooks = std::max((int) count(BLACK) - 2, 0); const int wAdditionalQueens = std::max((int) count(WHITE) - 1, 0); const int bAdditionalQueens = std::max((int) count(BLACK) - 1, 0); if (wAdditionalKnights + wAdditionalBishops + wAdditionalRooks + wAdditionalQueens > 8 - wPawns) return PositionSetError("Unsupported position. Too many major pieces for WHITE."); if (bAdditionalKnights + bAdditionalBishops + bAdditionalRooks + bAdditionalQueens > 8 - bPawns) return PositionSetError("Unsupported position. Too many major pieces for BLACK."); // 2. Active color if (!(ss >> token)) return PositionSetError("Invalid FEN. Unexpected end of stream."); if (token != 'w' && token != 'b') return PositionSetError(std::string("Invalid FEN. Invalid side to move: ") + std::string(1, token)); sideToMove = (token == 'w' ? WHITE : BLACK); if (!(ss >> token) || !isspace(token) || ss.eof()) return PositionSetError("Invalid FEN. Expected whitespace after side to move."); // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, // Shredder-FEN that uses the letters of the columns on which the rooks began // the game instead of KQkq and also X-FEN standard that, in case of Chess960, // if an inner rook is associated with the castling right, the castling tag is // replaced by the file letter of the involved rook, as for the Shredder-FEN. // // NOTE: Due to the prevalnce of incorrect (or missing) castling rights the // validation is less strict. However, incorrect castling rights are still sanitized. int num_castling_rights = 0; for (;;) { if (!(ss >> token)) break; if (isspace(token)) break; if (num_castling_rights == 0 && token == '-') { ss >> std::ws; break; } if (++num_castling_rights > 4) return PositionSetError("Invalid FEN. Maximum of 4 castling rights can be specified."); Square rsq = SQ_NONE; Square ksq = SQ_NONE; Color c = islower(token) ? BLACK : WHITE; Piece rook = make_piece(c, ROOK); Piece king = make_piece(c, KING); token = char(toupper(token)); if (token == 'K' || token == 'Q') { const int dir = token == 'K' ? -1 : 1; Square sq = relative_square(c, token == 'K' ? SQ_H1 : SQ_A1); // Look for a rook and a king for the castling. King must come later. // Only the first rook is noted. // If the castling rights are available the king must always be between files 2 and 7 inclusive // so there is no need to check the last square. for (int i = 0; i < 7; ++i, sq = Square(sq + dir)) { const Piece pc = piece_on(sq); if (pc == king) { ksq = sq; break; } else if (pc == rook && rsq == SQ_NONE) { rsq = sq; } } } else if (token >= 'A' && token <= 'H') { const Square rsqCandidate = make_square(File(token - 'A'), relative_rank(c, RANK_1)); ; if (piece_on(rsqCandidate) == rook) rsq = rsqCandidate; // If the castling rights are available the king must always be between files 2 and 7 inclusive. Square sq = relative_square(c, SQ_B1); for (int i = 0; i < 6; ++i, ++sq) { if (piece_on(sq) == king) ksq = sq; } } else { return PositionSetError(std::string("Invalid FEN. Expected castling rights. Got: ") + std::string(1, token)); } // Only apply castling rights if they can be valid. if (ksq != SQ_NONE && rsq != SQ_NONE) set_castling_right(c, rsq); } // 4. En passant square. // Ignore if square is invalid or not on side to move relative rank 6. bool enpassant = false, legalEP = false; unsigned char col = '-', row; ss >> col; if (col != '-') { if (!(ss >> row)) return PositionSetError("Invalid FEN. Unexpected end of stream."); if ((col >= 'a' && col <= 'h') && (row == (sideToMove == WHITE ? '6' : '3'))) { st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); Bitboard pawns = attacks_bb(st->epSquare, ~sideToMove) & pieces(sideToMove, PAWN); Bitboard target = (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))); Bitboard occ = pieces() ^ target ^ st->epSquare; // En passant square will be considered only if // a) side to move have a pawn threatening epSquare // b) there is an enemy pawn in front of epSquare // c) there is no piece on epSquare or behind epSquare enpassant = pawns && target && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); // If no pawn can execute the en passant capture without leaving the king in check, don't record the epSquare while (pawns) legalEP |= !(attackers_to(square(sideToMove), occ ^ pop_lsb(pawns)) & pieces(~sideToMove) & ~target); } else return PositionSetError("Invalid FEN. Invalid en-passant square."); } if (!enpassant || !legalEP) st->epSquare = SQ_NONE; // 5-6. Halfmove clock and fullmove number ss >> std::skipws >> st->rule50 >> gamePly; // Normally values larger than 99 would be pointless but we do support ignoring 50 move rule for TB purposes. // Limit at 2**15 as it's used multiplicativly with position evaluation during search. if (st->rule50 < 0 || st->rule50 > 32767) return PositionSetError("Unsupported position. Rule50 counter out of range."); if (gamePly < 0 || gamePly > 100000) return PositionSetError("Unsupported position. Game ply out of range."); // Convert from fullmove starting from 1 to gamePly starting from 0, // handle also common incorrect FEN with fullmove = 0. gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); chess960 = isChess960; set_state(); if (attackers_to_exist(square(~sideToMove), pieces(), sideToMove)) return PositionSetError("Unsupported position. King can be captured."); assert(pos_is_ok()); return std::nullopt; } // Helper function used to set castling // rights given the corresponding color and the rook starting square. void Position::set_castling_right(Color c, Square rfrom) { Square kfrom = square(c); CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE : QUEEN_SIDE); st->castlingRights |= cr; castlingRightsMask[kfrom] |= cr; castlingRightsMask[rfrom] |= cr; castlingRookSquare[cr] = rfrom; Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom); } // Sets king attacks to detect if a move gives check void Position::set_check_info() const { update_slider_blockers(WHITE); update_slider_blockers(BLACK); Square ksq = square(~sideToMove); st->checkSquares[PAWN] = attacks_bb(ksq, ~sideToMove); st->checkSquares[KNIGHT] = attacks_bb(ksq); st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; st->checkSquares[KING] = 0; } // Computes the hash keys of the position, and other // data that once computed is updated incrementally as moves are made. // The function is only used when a new position is set up void Position::set_state() const { st->key = 0; st->minorPieceKey = 0; st->nonPawnKey[WHITE] = st->nonPawnKey[BLACK] = 0; st->pawnKey = Zobrist::noPawns; st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); set_check_info(); for (Bitboard b = pieces(); b;) { Square s = pop_lsb(b); Piece pc = piece_on(s); st->key ^= Zobrist::psq[pc][s]; if (type_of(pc) == PAWN) st->pawnKey ^= Zobrist::psq[pc][s]; else { st->nonPawnKey[color_of(pc)] ^= Zobrist::psq[pc][s]; if (type_of(pc) != KING) { st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; if (type_of(pc) <= BISHOP) st->minorPieceKey ^= Zobrist::psq[pc][s]; } } } if (st->epSquare != SQ_NONE) st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; if (sideToMove == BLACK) st->key ^= Zobrist::side; st->key ^= Zobrist::castling[st->castlingRights]; st->materialKey = compute_material_key(); } Key Position::compute_material_key() const { Key k = 0; for (Piece pc : Pieces) for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) k ^= Zobrist::psq[pc][8 + cnt]; return k; } // Overload to initialize the position object with the given endgame code string // like "KBPKN". It's mainly a helper to get the material key out of an endgame code. std::optional Position::set(const string& code, Color c, StateInfo* si) { assert(code[0] == 'K'); string sides[] = {code.substr(code.find('K', 1)), // Weak code.substr(0, std::min(code.find('v'), code.find('K', 1)))}; // Strong assert(sides[0].length() > 0 && sides[0].length() < 8); assert(sides[1].length() > 0 && sides[1].length() < 8); std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; return set(fenStr, false, si); } // Returns a FEN representation of the position. In case of // Chess960 the Shredder-FEN notation is used. This is mainly a debugging function. string Position::fen() const { int emptyCnt; std::ostringstream ss; for (Rank r = RANK_8;; --r) { for (File f = FILE_A; f <= FILE_H; ++f) { for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) ++emptyCnt; if (emptyCnt) ss << emptyCnt; if (f <= FILE_H) ss << PieceToChar[piece_on(make_square(f, r))]; } if (r == RANK_1) break; ss << '/'; } ss << (sideToMove == WHITE ? " w " : " b "); if (can_castle(WHITE_OO)) ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO))) : 'K'); if (can_castle(WHITE_OOO)) ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); if (can_castle(BLACK_OO)) ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO))) : 'k'); if (can_castle(BLACK_OOO)) ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); if (!can_castle(ANY_CASTLING)) ss << '-'; ss << (ep_square() == SQ_NONE ? " - " : " " + UCIEngine::square(ep_square()) + " ") << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; return ss.str(); } // Calculates st->blockersForKing[c] and st->pinners[~c], // which store respectively the pieces preventing king of color c from being in check // and the slider pieces of color ~c pinning pieces of color c to the king. void Position::update_slider_blockers(Color c) const { Square ksq = square(c); st->blockersForKing[c] = 0; st->pinners[~c] = 0; // Snipers are sliders that attack 's' when a piece and other snipers are removed Bitboard snipers = ((attacks_bb(ksq) & pieces(QUEEN, ROOK)) | (attacks_bb(ksq) & pieces(QUEEN, BISHOP))) & pieces(~c); Bitboard occupancy = pieces() ^ snipers; while (snipers) { Square sniperSq = pop_lsb(snipers); Bitboard b = between_bb(ksq, sniperSq) & occupancy; if (b && !more_than_one(b)) { st->blockersForKing[c] |= b; if (b & pieces(c)) st->pinners[~c] |= sniperSq; } } } // Computes a bitboard of all pieces which attack a given square. // Slider attacks use the occupied bitboard to indicate occupancy. Bitboard Position::attackers_to(Square s, Bitboard occupied) const { return (attacks_bb(s, occupied) & pieces(ROOK, QUEEN)) | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) | (attacks_bb(s, BLACK) & pieces(WHITE, PAWN)) | (attacks_bb(s, WHITE) & pieces(BLACK, PAWN)) | (attacks_bb(s) & pieces(KNIGHT)) | (attacks_bb(s) & pieces(KING)); } bool Position::attackers_to_exist(Square s, Bitboard occupied, Color c) const { return (attacks_bb(s, occupied) & pieces(c, ROOK, QUEEN)) || (attacks_bb(s, occupied) & pieces(c, BISHOP, QUEEN)) || (attacks_bb(s, ~c) & pieces(c, PAWN)) || (attacks_bb(s) & pieces(c, KNIGHT)) || (attacks_bb(s) & pieces(c, KING)); } // Tests whether a pseudo-legal move is legal bool Position::legal(Move m) const { assert(m.is_ok()); Color us = sideToMove; Square from = m.from_sq(); Square to = m.to_sq(); assert(color_of(moved_piece(m)) == us); assert(piece_on(square(us)) == make_piece(us, KING)); // En passant captures are a tricky special case. Because they are rather // uncommon, we do it simply by testing whether the king is attacked after // the move is made. if (m.type_of() == EN_PASSANT) { Square ksq = square(us); Square capsq = to - pawn_push(us); Bitboard occupied = (pieces() ^ from ^ capsq) | to; assert(to == ep_square()); assert(moved_piece(m) == make_piece(us, PAWN)); assert(piece_on(capsq) == make_piece(~us, PAWN)); assert(piece_on(to) == NO_PIECE); return !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, ROOK)) && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); } // Castling moves generation does not check if the castling path is clear of // enemy attacks, it is delayed at a later time: now! if (m.type_of() == CASTLING) { // After castling, the rook and king final positions are the same in // Chess960 as they would be in standard chess. to = relative_square(us, to > from ? SQ_G1 : SQ_C1); Direction step = to > from ? WEST : EAST; for (Square s = to; s != from; s += step) if (attackers_to_exist(s, pieces(), ~us)) return false; // In case of Chess960, verify if the Rook blocks some checks. // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. return !chess960 || !(blockers_for_king(us) & m.to_sq()); } // If the moving piece is a king, check whether the destination square is // attacked by the opponent. if (type_of(piece_on(from)) == KING) return !(attackers_to_exist(to, pieces() ^ from, ~us)); // A non-king move is legal if and only if it is not pinned or it // is moving along the ray towards or away from the king. return !(blockers_for_king(us) & from) || line_bb(from, to) & pieces(us, KING); } // Takes a random move and tests whether the move is // pseudo-legal. It is used to validate moves from TT that can be corrupted // due to SMP concurrent access or hash position key aliasing. bool Position::pseudo_legal(const Move m) const { Color us = sideToMove; Square from = m.from_sq(); Square to = m.to_sq(); Piece pc = moved_piece(m); // Use a slower but simpler function for uncommon cases // yet we skip the legality check of MoveList(). if (m.type_of() != NORMAL) return checkers() ? MoveList(*this).contains(m) : MoveList(*this).contains(m); // Is not a promotion, so the promotion piece must be empty assert(m.promotion_type() - KNIGHT == NO_PIECE_TYPE); // If the 'from' square is not occupied by a piece belonging to the side to // move, the move is obviously not legal. if (pc == NO_PIECE || color_of(pc) != us) return false; // The destination square cannot be occupied by a friendly piece if (pieces(us) & to) return false; // Handle the special case of a pawn move if (type_of(pc) == PAWN) { // We have already handled promotion moves, so destination cannot be on the 8th/1st rank if ((Rank8BB | Rank1BB) & to) return false; // Check if it's a valid capture, single push, or double push const bool isCapture = bool(attacks_bb(from, us) & pieces(~us) & to); const bool isSinglePush = (from + pawn_push(us) == to) && empty(to); const bool isDoublePush = (from + 2 * pawn_push(us) == to) && (relative_rank(us, from) == RANK_2) && empty(to) && empty(to - pawn_push(us)); if (!(isCapture || isSinglePush || isDoublePush)) return false; } else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) return false; // Evasions generator already takes care to avoid some kind of illegal moves // and legal() relies on this. We therefore have to take care that the same // kind of moves are filtered out here. if (checkers()) { if (type_of(pc) != KING) { // Double check? In this case, a king move is required if (more_than_one(checkers())) return false; // Our move must be a blocking interposition or a capture of the checking piece if (!(between_bb(square(us), lsb(checkers())) & to)) return false; } // In case of king moves under check we have to remove the king so as to catch // invalid moves like b1a1 when opposite queen is on c1. else if (attackers_to_exist(to, pieces() ^ from, ~us)) return false; } return true; } // Tests whether a pseudo-legal move gives a check bool Position::gives_check(Move m) const { assert(m.is_ok()); assert(color_of(moved_piece(m)) == sideToMove); Square from = m.from_sq(); Square to = m.to_sq(); // Is there a direct check? if (check_squares(type_of(piece_on(from))) & to) return true; // Is there a discovered check? if (blockers_for_king(~sideToMove) & from) return !(line_bb(from, to) & pieces(~sideToMove, KING)) || m.type_of() == CASTLING; switch (m.type_of()) { case NORMAL : return false; case PROMOTION : return attacks_bb(m.promotion_type(), to, pieces() ^ from) & pieces(~sideToMove, KING); // En passant capture with check? We have already handled the case of direct // checks and ordinary discovered check, so the only case we need to handle // is the unusual case of a discovered check through the captured pawn. case EN_PASSANT : { Square capsq = make_square(file_of(to), rank_of(from)); Bitboard b = (pieces() ^ from ^ capsq) | to; return (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) | (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP)); } default : //CASTLING { // Castling is encoded as 'king captures the rook' Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); return check_squares(ROOK) & rto; } } } // Makes a move, and saves all information necessary // to a StateInfo object. The move is assumed to be legal. Pseudo-legal // moves should be filtered out before this function is called. // If a pointer to the TT table is passed, the entry for the new position // will be prefetched, and likewise for shared history. void Position::do_move(Move m, StateInfo& newSt, bool givesCheck, DirtyPiece& dp, DirtyThreats& dts, const TranspositionTable* tt = nullptr, const SharedHistories* history = nullptr) { assert(m.is_ok()); assert(&newSt != st); Key k = st->key ^ Zobrist::side; // Copy some fields of the old state to our new StateInfo object except the // ones which are going to be recalculated from scratch anyway and then switch // our state pointer to point to the new (ready to be updated) state. std::memcpy(&newSt, st, offsetof(StateInfo, key)); newSt.previous = st; st = &newSt; // Increment ply counters. In particular, rule50 will be reset to zero later on // in case of a capture or a pawn move. ++gamePly; ++st->rule50; ++st->pliesFromNull; Color us = sideToMove; Color them = ~us; Square from = m.from_sq(); Square to = m.to_sq(); Piece pc = piece_on(from); Piece captured = m.type_of() == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); dp.pc = pc; dp.from = from; dp.to = to; dp.add_sq = SQ_NONE; dts.us = us; dts.prevKsq = square(us); dts.threatenedSqs = dts.threateningSqs = 0; assert(color_of(pc) == us); assert(captured == NO_PIECE || color_of(captured) == (m.type_of() != CASTLING ? them : us)); assert(type_of(captured) != KING); if (m.type_of() == CASTLING) { assert(pc == make_piece(us, KING)); assert(captured == make_piece(us, ROOK)); Square rfrom, rto; do_castling(us, from, to, rfrom, rto, &dts, &dp); k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; st->nonPawnKey[us] ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; captured = NO_PIECE; } else if (captured) { Square capsq = to; // If the captured piece is a pawn, update pawn hash key, otherwise // update non-pawn material. if (type_of(captured) == PAWN) { if (m.type_of() == EN_PASSANT) { capsq -= pawn_push(us); assert(pc == make_piece(us, PAWN)); assert(to == st->epSquare); assert(relative_rank(us, to) == RANK_6); assert(piece_on(to) == NO_PIECE); assert(piece_on(capsq) == make_piece(them, PAWN)); // Update board and piece lists in ep case, normal captures are updated later remove_piece(capsq, &dts); } st->pawnKey ^= Zobrist::psq[captured][capsq]; } else { st->nonPawnMaterial[them] -= PieceValue[captured]; st->nonPawnKey[them] ^= Zobrist::psq[captured][capsq]; if (type_of(captured) <= BISHOP) st->minorPieceKey ^= Zobrist::psq[captured][capsq]; } dp.remove_pc = captured; dp.remove_sq = capsq; k ^= Zobrist::psq[captured][capsq]; st->materialKey ^= Zobrist::psq[captured][8 + pieceCount[captured] - (m.type_of() != EN_PASSANT)]; // Reset rule 50 counter st->rule50 = 0; } else dp.remove_sq = SQ_NONE; // Update hash key k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; // Reset en passant square if (st->epSquare != SQ_NONE) { k ^= Zobrist::enpassant[file_of(st->epSquare)]; st->epSquare = SQ_NONE; } // Update castling rights. k ^= Zobrist::castling[st->castlingRights]; st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); k ^= Zobrist::castling[st->castlingRights]; // Move the piece. The tricky Chess960 castling is handled earlier if (m.type_of() != CASTLING) { if (captured && m.type_of() != EN_PASSANT) { remove_piece(from, &dts); swap_piece(to, pc, &dts); } else move_piece(from, to, &dts); } // If the moving piece is a pawn do some special extra work if (type_of(pc) == PAWN) { // Check if the en passant square needs to be set. Accurate e.p. info is needed // for correct zobrist key generation and 3-fold checking. if ((int(to) ^ int(from)) == 16) { Square epSquare = to - pawn_push(us); Bitboard pawns = attacks_bb(epSquare, us) & pieces(them, PAWN); // If there are no pawns attacking the ep square, ep is not possible. if (pawns) { Square ksq = square(them); Bitboard notBlockers = ~st->previous->blockersForKing[them]; bool noDiscovery = (from & notBlockers) || file_of(from) == file_of(ksq); // If the pawn gives discovered check, ep is never legal. Else, if at least one // pawn was not a blocker for the enemy king or lies on the same line as the // enemy king and en passant square, a legal capture exists. if (noDiscovery && (pawns & (notBlockers | line_bb(epSquare, ksq)))) { st->epSquare = epSquare; k ^= Zobrist::enpassant[file_of(epSquare)]; } } } else if (m.type_of() == PROMOTION) { Piece promotion = make_piece(us, m.promotion_type()); PieceType promotionType = type_of(promotion); assert(relative_rank(us, to) == RANK_8); assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); swap_piece(to, promotion, &dts); dp.add_pc = promotion; dp.add_sq = to; dp.to = SQ_NONE; // Update hash keys // Zobrist::psq[pc][to] is zero, so we don't need to clear it k ^= Zobrist::psq[promotion][to]; st->materialKey ^= Zobrist::psq[promotion][8 + pieceCount[promotion] - 1] ^ Zobrist::psq[pc][8 + pieceCount[pc]]; st->nonPawnKey[us] ^= Zobrist::psq[promotion][to]; if (promotionType <= BISHOP) st->minorPieceKey ^= Zobrist::psq[promotion][to]; // Update material st->nonPawnMaterial[us] += PieceValue[promotion]; } // Update pawn hash key st->pawnKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; // Reset rule 50 draw counter st->rule50 = 0; } else { st->nonPawnKey[us] ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; if (type_of(pc) <= BISHOP) st->minorPieceKey ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; } // Update the key with the final value st->key = k; if (tt) prefetch(tt->first_entry(key())); if (history) { prefetch(&history->pawn_entry(*this)[pc][to]); prefetch(&history->pawn_correction_entry(*this)); prefetch(&history->minor_piece_correction_entry(*this)); prefetch(&history->nonpawn_correction_entry(*this)); prefetch(&history->nonpawn_correction_entry(*this)); } // Set capture piece st->capturedPiece = captured; // Calculate checkers bitboard (if move gives check) st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; sideToMove = ~sideToMove; // Update king attacks used for fast check detection set_check_info(); // Calculate the repetition info. It is the ply distance from the previous // occurrence of the same position, negative in the 3-fold case, or zero // if the position was not repeated. st->repetition = 0; int end = std::min(st->rule50, st->pliesFromNull); if (end >= 4) { StateInfo* stp = st->previous->previous; for (int i = 4; i <= end; i += 2) { stp = stp->previous->previous; if (stp->key == st->key) { st->repetition = stp->repetition ? -i : i; break; } } } dts.ksq = square(us); assert(pos_is_ok()); assert(dp.pc != NO_PIECE); assert(!(bool(captured) || m.type_of() == CASTLING) ^ (dp.remove_sq != SQ_NONE)); assert(dp.from != SQ_NONE); assert(!(dp.add_sq != SQ_NONE) ^ (m.type_of() == PROMOTION || m.type_of() == CASTLING)); } // Unmakes a move. When it returns, the position should // be restored to exactly the same state as before the move was made. void Position::undo_move(Move m) { assert(m.is_ok()); sideToMove = ~sideToMove; Color us = sideToMove; Square from = m.from_sq(); Square to = m.to_sq(); Piece pc = piece_on(to); assert(empty(from) || m.type_of() == CASTLING); assert(type_of(st->capturedPiece) != KING); if (m.type_of() == PROMOTION) { assert(relative_rank(us, to) == RANK_8); assert(type_of(pc) == m.promotion_type()); assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); pc = make_piece(us, PAWN); swap_piece(to, pc); } if (m.type_of() == CASTLING) { Square rfrom, rto; do_castling(us, from, to, rfrom, rto); } else { move_piece(to, from); // Put the piece back at the source square if (st->capturedPiece) { Square capsq = to; if (m.type_of() == EN_PASSANT) { capsq -= pawn_push(us); assert(type_of(pc) == PAWN); assert(to == st->previous->epSquare); assert(relative_rank(us, to) == RANK_6); assert(piece_on(capsq) == NO_PIECE); assert(st->capturedPiece == make_piece(~us, PAWN)); } put_piece(st->capturedPiece, capsq); // Restore the captured piece } } // Finally point our state pointer back to the previous state st = st->previous; --gamePly; assert(pos_is_ok()); } template inline void add_dirty_threat( DirtyThreats* const dts, Piece pc, Piece threatened, Square s, Square threatenedSq) { if (PutPiece) { dts->threatenedSqs |= threatenedSq; dts->threateningSqs |= s; } dts->list.push_back({pc, threatened, s, threatenedSq, PutPiece}); } #ifdef USE_AVX512ICL // Given a DirtyThreat template and bit offsets to insert the piece type and square, write the threats // present at the given bitboard. template void write_multiple_dirties(const Position& p, Bitboard mask, DirtyThreat dt_template, DirtyThreats* dts) { static_assert(sizeof(DirtyThreat) == 4); const __m512i board = _mm512_loadu_si512(p.piece_array().data()); const __m512i AllSquares = _mm512_set_epi8( 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const int dt_count = popcount(mask); assert(dt_count <= 16); const __m512i template_v = _mm512_set1_epi32(dt_template.raw()); auto* write = dts->list.make_space(dt_count); // Extract the list of squares and upconvert to 32 bits. There are never more than 16 // incoming threats so this is sufficient. __m512i threat_squares = _mm512_maskz_compress_epi8(mask, AllSquares); threat_squares = _mm512_cvtepi8_epi32(_mm512_castsi512_si128(threat_squares)); __m512i threat_pieces = _mm512_maskz_permutexvar_epi8(0x1111111111111111ULL, threat_squares, board); // Shift the piece and square into place threat_squares = _mm512_slli_epi32(threat_squares, SqShift); threat_pieces = _mm512_slli_epi32(threat_pieces, PcShift); const __m512i dirties = _mm512_ternarylogic_epi32(template_v, threat_squares, threat_pieces, 254 /* A | B | C */); _mm512_storeu_si512(write, dirties); } #endif template void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts, [[maybe_unused]] Bitboard noRaysContaining) const { const Bitboard occupied = pieces(); const Bitboard rookQueens = pieces(ROOK, QUEEN); const Bitboard bishopQueens = pieces(BISHOP, QUEEN); const Bitboard rAttacks = attacks_bb(s, occupied); const Bitboard bAttacks = attacks_bb(s, occupied); const Bitboard kings = pieces(KING); Bitboard occupiedNoK = occupied ^ kings; Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks); auto process_sliders = [&](bool addDirectAttacks) { while (sliders) { Square sliderSq = pop_lsb(sliders); Piece slider = piece_on(sliderSq); const Bitboard ray = RayPassBB[sliderSq][s]; const Bitboard discovered = ray & (rAttacks | bAttacks) & occupiedNoK; assert(!more_than_one(discovered)); if (discovered && (RayPassBB[sliderSq][s] & noRaysContaining) != noRaysContaining) { const Square threatenedSq = lsb(discovered); const Piece threatenedPc = piece_on(threatenedSq); add_dirty_threat(dts, slider, threatenedPc, sliderSq, threatenedSq); } if (addDirectAttacks) add_dirty_threat(dts, slider, pc, sliderSq, s); } }; if (type_of(pc) == KING) { if constexpr (ComputeRay) process_sliders(false); return; } const Bitboard knights = pieces(KNIGHT); const Bitboard whitePawns = pieces(WHITE, PAWN); const Bitboard blackPawns = pieces(BLACK, PAWN); Bitboard threatened = attacks_bb(pc, s, occupied) & occupiedNoK; Bitboard incoming_threats = (PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb(s, WHITE) & blackPawns) | (attacks_bb(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings); #ifdef USE_AVX512ICL if constexpr (PutPiece) { dts->threatenedSqs |= threatened; // A bit may only be set if that square actually produces a threat, so we // must guard setting the square accordingly dts->threateningSqs |= Bitboard(bool(threatened)) << s; } DirtyThreat dt_template{pc, NO_PIECE, s, Square(0), PutPiece}; write_multiple_dirties( *this, threatened, dt_template, dts); Bitboard all_attackers = sliders | incoming_threats; if constexpr (PutPiece) { dts->threatenedSqs |= Bitboard(bool(all_attackers)) << s; // same as above dts->threateningSqs |= all_attackers; } dt_template = {NO_PIECE, pc, Square(0), s, PutPiece}; write_multiple_dirties(*this, all_attackers, dt_template, dts); #else while (threatened) { Square threatenedSq = pop_lsb(threatened); Piece threatenedPc = piece_on(threatenedSq); assert(threatenedSq != s); assert(threatenedPc); add_dirty_threat(dts, pc, threatenedPc, s, threatenedSq); } #endif if constexpr (ComputeRay) { #ifndef USE_AVX512ICL process_sliders(true); #else // for ICL, direct threats were processed earlier (all_attackers) process_sliders(false); #endif } else { incoming_threats |= sliders; } #ifndef USE_AVX512ICL while (incoming_threats) { Square srcSq = pop_lsb(incoming_threats); Piece srcPc = piece_on(srcSq); assert(srcSq != s); assert(srcPc != NO_PIECE); add_dirty_threat(dts, srcPc, pc, srcSq, s); } #endif } // Helper used to do/undo a castling move. This is a bit // tricky in Chess960 where from/to squares can overlap. template void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto, DirtyThreats* const dts, DirtyPiece* const dp) { bool kingSide = to > from; rfrom = to; // Castling is encoded as "king captures friendly rook" rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); assert(!Do || dp); if (Do) { dp->to = to; dp->remove_pc = dp->add_pc = make_piece(us, ROOK); dp->remove_sq = rfrom; dp->add_sq = rto; } // Remove both pieces first since squares could overlap in Chess960 remove_piece(Do ? from : to, dts); remove_piece(Do ? rfrom : rto, dts); put_piece(make_piece(us, KING), Do ? to : from, dts); put_piece(make_piece(us, ROOK), Do ? rto : rfrom, dts); } // Used to do a "null move": it flips // the side to move without executing any move on the board. void Position::do_null_move(StateInfo& newSt) { assert(!checkers()); assert(&newSt != st); std::memcpy(&newSt, st, sizeof(StateInfo)); newSt.previous = st; st = &newSt; if (st->epSquare != SQ_NONE) { st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; st->epSquare = SQ_NONE; } st->key ^= Zobrist::side; st->pliesFromNull = 0; sideToMove = ~sideToMove; set_check_info(); st->repetition = 0; assert(pos_is_ok()); } // Must be used to undo a "null move" void Position::undo_null_move() { assert(!checkers()); st = st->previous; sideToMove = ~sideToMove; } // Tests if the SEE (Static Exchange Evaluation) // value of move is greater or equal to the given threshold. We'll use an // algorithm similar to alpha-beta pruning with a null window. bool Position::see_ge(Move m, int threshold) const { assert(m.is_ok()); // Only deal with normal moves, assume others pass a simple SEE if (m.type_of() != NORMAL) return VALUE_ZERO >= threshold; Square from = m.from_sq(), to = m.to_sq(); assert(piece_on(from) != NO_PIECE); int swap = PieceValue[piece_on(to)] - threshold; if (swap < 0) return false; swap = PieceValue[piece_on(from)] - swap; if (swap <= 0) return true; assert(color_of(piece_on(from)) == sideToMove); Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic Color stm = sideToMove; Bitboard attackers = attackers_to(to, occupied); Bitboard stmAttackers, bb; int res = 1; while (true) { stm = ~stm; attackers &= occupied; // If stm has no more attackers then give up: stm loses if (!(stmAttackers = attackers & pieces(stm))) break; // Don't allow pinned pieces to attack as long as there are // pinners on their original square. if (pinners(~stm) & occupied) { stmAttackers &= ~blockers_for_king(stm); if (!stmAttackers) break; } res ^= 1; // Locate and remove the next least valuable attacker, and add to // the bitboard 'attackers' any X-ray attackers behind it. if ((bb = stmAttackers & pieces(PAWN))) { if ((swap = PawnValue - swap) < res) break; occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(KNIGHT))) { if ((swap = KnightValue - swap) < res) break; occupied ^= least_significant_square_bb(bb); } else if ((bb = stmAttackers & pieces(BISHOP))) { if ((swap = BishopValue - swap) < res) break; occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(ROOK))) { if ((swap = RookValue - swap) < res) break; occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); } else if ((bb = stmAttackers & pieces(QUEEN))) { swap = QueenValue - swap; // implies that the previous recapture was done by a higher rated piece than a Queen (King is excluded) assert(swap >= res); occupied ^= least_significant_square_bb(bb); attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) | (attacks_bb(to, occupied) & pieces(ROOK, QUEEN)); } else // KING // If we "capture" with the king but the opponent still has attackers, // reverse the result. return (attackers & ~pieces(stm)) ? res ^ 1 : res; } return bool(res); } // Tests whether the position is drawn by 50-move rule // or by repetition. It does not detect stalemates. bool Position::is_draw(int ply) const { if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) return true; return is_repetition(ply); } // Return a draw score if a position repeats once earlier but strictly // after the root, or repeats twice before or at the root. bool Position::is_repetition(int ply) const { return st->repetition && st->repetition < ply; } // Tests whether there has been at least one repetition // of positions since the last capture or pawn move. bool Position::has_repeated() const { StateInfo* stc = st; int end = std::min(st->rule50, st->pliesFromNull); while (end-- >= 4) { if (stc->repetition) return true; stc = stc->previous; } return false; } // Tests if the position has a move which draws by repetition. // This function accurately matches the outcome of is_draw() over all legal moves. bool Position::upcoming_repetition(int ply) const { int j; int end = std::min(st->rule50, st->pliesFromNull); if (end < 3) return false; Key originalKey = st->key; StateInfo* stp = st->previous; Key other = originalKey ^ stp->key ^ Zobrist::side; for (int i = 3; i <= end; i += 2) { stp = stp->previous; other ^= stp->key ^ stp->previous->key ^ Zobrist::side; stp = stp->previous; if (other != 0) continue; Key moveKey = originalKey ^ stp->key; if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) { Move move = cuckooMove[j]; Square s1 = move.from_sq(); Square s2 = move.to_sq(); if (!((between_bb(s1, s2) ^ s2) & pieces())) { if (ply > i) return true; // For nodes before or at the root, check that the move is a // repetition rather than a move to the current position. if (stp->repetition) return true; } } } return false; } // Flips position with the white and black sides reversed. This // is only useful for debugging e.g. for finding evaluation symmetry bugs. void Position::flip() { string f, token; std::stringstream ss(fen()); for (Rank r = RANK_8;; --r) // Piece placement { std::getline(ss, token, r > RANK_1 ? '/' : ' '); f.insert(0, token + (f.empty() ? " " : "/")); if (r == RANK_1) break; } ss >> token; // Active color f += (token == "w" ? "B " : "W "); // Will be lowercased later ss >> token; // Castling availability f += token + " "; std::transform(f.begin(), f.end(), f.begin(), [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); ss >> token; // En passant square f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); std::getline(ss, token); // Half and full moves f += token; set(f, is_chess960(), st); assert(pos_is_ok()); } bool Position::material_key_is_ok() const { return compute_material_key() == st->materialKey; } // Performs some consistency checks for the position object // and raise an assert if something wrong is detected. // This is meant to be helpful when debugging. bool Position::pos_is_ok() const { constexpr bool Fast = true; // Quick (default) or full check? if ((sideToMove != WHITE && sideToMove != BLACK) || piece_on(square(WHITE)) != W_KING || piece_on(square(BLACK)) != B_KING || (ep_square() != SQ_NONE && relative_rank(sideToMove, ep_square()) != RANK_6)) assert(0 && "pos_is_ok: Default"); if (Fast) return true; if (pieceCount[W_KING] != 1 || pieceCount[B_KING] != 1 || attackers_to_exist(square(~sideToMove), pieces(), sideToMove)) assert(0 && "pos_is_ok: Kings"); if ((pieces(PAWN) & (Rank1BB | Rank8BB)) || pieceCount[W_PAWN] > 8 || pieceCount[B_PAWN] > 8) assert(0 && "pos_is_ok: Pawns"); if (ep_square() != SQ_NONE) { Square ksq = square(sideToMove); Bitboard captured = (ep_square() + pawn_push(~sideToMove)) & pieces(~sideToMove, PAWN); Bitboard pawns = attacks_bb(ep_square(), ~sideToMove) & pieces(sideToMove, PAWN); Bitboard potentialCheckers = pieces(~sideToMove) ^ captured; if (!captured || !pawns || ((attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ lsb(pawns)) & potentialCheckers) && (attackers_to(ksq, pieces() ^ captured ^ ep_square() ^ msb(pawns)) & potentialCheckers))) assert(0 && "pos_is_ok: En passant square"); } if ((pieces(WHITE) & pieces(BLACK)) || (pieces(WHITE) | pieces(BLACK)) != pieces() || popcount(pieces(WHITE)) > 16 || popcount(pieces(BLACK)) > 16) assert(0 && "pos_is_ok: Bitboards"); for (PieceType p1 = PAWN; p1 <= KING; ++p1) for (PieceType p2 = PAWN; p2 <= KING; ++p2) if (p1 != p2 && (pieces(p1) & pieces(p2))) assert(0 && "pos_is_ok: Bitboards"); for (Piece pc : Pieces) if (pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) || pieceCount[pc] != std::count(board.begin(), board.end(), pc)) assert(0 && "pos_is_ok: Pieces"); for (Color c : {WHITE, BLACK}) for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) { if (!can_castle(cr)) continue; if (piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) || castlingRightsMask[castlingRookSquare[cr]] != cr || (castlingRightsMask[square(c)] & cr) != cr) assert(0 && "pos_is_ok: Castling"); } assert(material_key_is_ok() && "pos_is_ok: materialKey"); return true; } } // namespace Stockfish ================================================ FILE: src/position.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef POSITION_H_INCLUDED #define POSITION_H_INCLUDED #include #include #include #include #include #include #include #include #include #include "bitboard.h" #include "types.h" namespace Stockfish { class TranspositionTable; struct SharedHistories; // StateInfo struct stores information needed to restore a Position object to // its previous state when we retract a move. Whenever a move is made on the // board (by calling Position::do_move), a StateInfo object must be passed. struct StateInfo { // Copied when making a move Key materialKey; Key pawnKey; Key minorPieceKey; Key nonPawnKey[COLOR_NB]; Value nonPawnMaterial[COLOR_NB]; int castlingRights; int rule50; int pliesFromNull; Square epSquare; // Not copied when making a move (will be recomputed anyhow) Key key; Bitboard checkersBB; StateInfo* previous; Bitboard blockersForKing[COLOR_NB]; Bitboard pinners[COLOR_NB]; Bitboard checkSquares[PIECE_TYPE_NB]; Piece capturedPiece; int repetition; }; // A list to keep track of the position states along the setup moves (from the // start position to the position just before the search starts). Needed by // 'draw by repetition' detection. Use a std::deque because pointers to // elements are not invalidated upon list resizing. using StateListPtr = std::unique_ptr>; // This error should be used whenever a position is suspected to be unsupported // by the engine. In particular positions that may cause hard errors like segmentation fault. struct PositionSetError: std::runtime_error { using std::runtime_error::runtime_error; }; // Position class stores information regarding the board representation as // pieces, side to move, hash keys, castling info, etc. Important methods are // do_move() and undo_move(), used by the search to update node info when // traversing the search tree. class Position { public: static void init(); Position() = default; Position(const Position&) = delete; Position& operator=(const Position&) = delete; // FEN string input/output std::optional set(const std::string& fenStr, bool isChess960, StateInfo* si); std::optional set(const std::string& code, Color c, StateInfo* si); std::string fen() const; // Position representation Bitboard pieces() const; // All pieces template Bitboard pieces(PieceTypes... pts) const; Bitboard pieces(Color c) const; template Bitboard pieces(Color c, PieceTypes... pts) const; Piece piece_on(Square s) const; const std::array& piece_array() const; Square ep_square() const; bool empty(Square s) const; template int count(Color c) const; template int count() const; template Square square(Color c) const; // Castling bool can_castle(CastlingRights cr) const; bool castling_impeded(CastlingRights cr) const; Square castling_rook_square(CastlingRights cr) const; // Checking Bitboard checkers() const; Bitboard blockers_for_king(Color c) const; Bitboard check_squares(PieceType pt) const; Bitboard pinners(Color c) const; // Attacks to/from a given square Bitboard attackers_to(Square s) const; Bitboard attackers_to(Square s, Bitboard occupied) const; bool attackers_to_exist(Square s, Bitboard occupied, Color c) const; void update_slider_blockers(Color c) const; template Bitboard attacks_by(Color c) const; // Properties of moves bool legal(Move m) const; bool pseudo_legal(const Move m) const; bool capture(Move m) const; bool capture_stage(Move m) const; bool gives_check(Move m) const; Piece moved_piece(Move m) const; Piece captured_piece() const; // Doing and undoing moves void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt); void do_move(Move m, StateInfo& newSt, bool givesCheck, DirtyPiece& dp, DirtyThreats& dts, const TranspositionTable* tt, const SharedHistories* worker); void undo_move(Move m); void do_null_move(StateInfo& newSt); void undo_null_move(); // Static Exchange Evaluation bool see_ge(Move m, int threshold = 0) const; // Accessing hash keys Key key() const; Key material_key() const; Key pawn_key() const; Key minor_piece_key() const; Key non_pawn_key(Color c) const; // Other properties of the position Color side_to_move() const; int game_ply() const; bool is_chess960() const; bool is_draw(int ply) const; bool is_repetition(int ply) const; bool upcoming_repetition(int ply) const; bool has_repeated() const; int rule50_count() const; Value non_pawn_material(Color c) const; Value non_pawn_material() const; // Position consistency check, for debugging bool pos_is_ok() const; bool material_key_is_ok() const; void flip(); StateInfo* state() const; void put_piece(Piece pc, Square s, DirtyThreats* const dts = nullptr); void remove_piece(Square s, DirtyThreats* const dts = nullptr); void swap_piece(Square s, Piece pc, DirtyThreats* const dts = nullptr); private: // Initialization helpers (used while setting up a position) void set_castling_right(Color c, Square rfrom); Key compute_material_key() const; void set_state() const; void set_check_info() const; // Other helpers template void update_piece_threats(Piece pc, Square s, DirtyThreats* const dts, Bitboard noRaysContaining = -1ULL) const; void move_piece(Square from, Square to, DirtyThreats* const dts = nullptr); template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto, DirtyThreats* const dts = nullptr, DirtyPiece* const dp = nullptr); Key adjust_key50(Key k) const; // Data members std::array board; std::array byTypeBB; std::array byColorBB; int pieceCount[PIECE_NB]; int castlingRightsMask[SQUARE_NB]; Square castlingRookSquare[CASTLING_RIGHT_NB]; Bitboard castlingPath[CASTLING_RIGHT_NB]; StateInfo* st; int gamePly; Color sideToMove; bool chess960; DirtyPiece scratch_dp; DirtyThreats scratch_dts; }; std::ostream& operator<<(std::ostream& os, const Position& pos); inline Color Position::side_to_move() const { return sideToMove; } inline Piece Position::piece_on(Square s) const { assert(is_ok(s)); return board[s]; } inline const std::array& Position::piece_array() const { return board; } inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; } inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); } inline Bitboard Position::pieces() const { return byTypeBB[ALL_PIECES]; } template inline Bitboard Position::pieces(PieceTypes... pts) const { return (byTypeBB[pts] | ...); } inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } template inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { return pieces(c) & pieces(pts...); } template inline int Position::count(Color c) const { return pieceCount[make_piece(c, Pt)]; } template inline int Position::count() const { return count(WHITE) + count(BLACK); } template inline Square Position::square(Color c) const { assert(count(c) == 1); return lsb(pieces(c, Pt)); } inline Square Position::ep_square() const { return st->epSquare; } inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; } inline bool Position::castling_impeded(CastlingRights cr) const { assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); return pieces() & castlingPath[cr]; } inline Square Position::castling_rook_square(CastlingRights cr) const { assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); return castlingRookSquare[cr]; } inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); } template inline Bitboard Position::attacks_by(Color c) const { if constexpr (Pt == PAWN) return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) : pawn_attacks_bb(pieces(BLACK, PAWN)); else { Bitboard threats = 0; Bitboard attackers = pieces(c, Pt); while (attackers) threats |= attacks_bb(pop_lsb(attackers), pieces()); return threats; } } inline Bitboard Position::checkers() const { return st->checkersBB; } inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; } inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } inline Key Position::key() const { return adjust_key50(st->key); } inline Key Position::adjust_key50(Key k) const { return st->rule50 < 14 ? k : k ^ make_key((st->rule50 - 14) / 8); } inline Key Position::pawn_key() const { return st->pawnKey; } inline Key Position::material_key() const { return st->materialKey; } inline Key Position::minor_piece_key() const { return st->minorPieceKey; } inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; } inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } inline Value Position::non_pawn_material() const { return non_pawn_material(WHITE) + non_pawn_material(BLACK); } inline int Position::game_ply() const { return gamePly; } inline int Position::rule50_count() const { return st->rule50; } inline bool Position::is_chess960() const { return chess960; } inline bool Position::capture(Move m) const { assert(m.is_ok()); return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT; } // Returns true if a move is generated from the capture stage, having also // queen promotions covered, i.e. consistency with the capture stage move // generation is needed to avoid the generation of duplicate moves. inline bool Position::capture_stage(Move m) const { assert(m.is_ok()); return capture(m) || m.promotion_type() == QUEEN; } inline Piece Position::captured_piece() const { return st->capturedPiece; } inline void Position::put_piece(Piece pc, Square s, DirtyThreats* const dts) { board[s] = pc; byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; byColorBB[color_of(pc)] |= s; pieceCount[pc]++; pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; if (dts) update_piece_threats(pc, s, dts); } inline void Position::remove_piece(Square s, DirtyThreats* const dts) { Piece pc = board[s]; if (dts) update_piece_threats(pc, s, dts); byTypeBB[ALL_PIECES] ^= s; byTypeBB[type_of(pc)] ^= s; byColorBB[color_of(pc)] ^= s; board[s] = NO_PIECE; pieceCount[pc]--; pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; } inline void Position::move_piece(Square from, Square to, DirtyThreats* const dts) { Piece pc = board[from]; Bitboard fromTo = from | to; if (dts) update_piece_threats(pc, from, dts, fromTo); byTypeBB[ALL_PIECES] ^= fromTo; byTypeBB[type_of(pc)] ^= fromTo; byColorBB[color_of(pc)] ^= fromTo; board[from] = NO_PIECE; board[to] = pc; if (dts) update_piece_threats(pc, to, dts, fromTo); } inline void Position::swap_piece(Square s, Piece pc, DirtyThreats* const dts) { Piece old = board[s]; remove_piece(s); if (dts) update_piece_threats(old, s, dts); put_piece(pc, s); if (dts) update_piece_threats(pc, s, dts); } inline void Position::do_move(Move m, StateInfo& newSt, const TranspositionTable* tt = nullptr) { new (&scratch_dts) DirtyThreats; do_move(m, newSt, gives_check(m), scratch_dp, scratch_dts, tt, nullptr); } inline StateInfo* Position::state() const { return st; } } // namespace Stockfish #endif // #ifndef POSITION_H_INCLUDED ================================================ FILE: src/score.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "score.h" #include #include #include #include "uci.h" namespace Stockfish { Score::Score(Value v, const Position& pos) { assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); if (!is_decisive(v)) { score = InternalUnits{UCIEngine::to_cp(v, pos)}; } else if (std::abs(v) <= VALUE_TB) { auto distance = VALUE_TB - std::abs(v); score = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false}; } else { auto distance = VALUE_MATE - std::abs(v); score = (v > 0) ? Mate{distance} : Mate{-distance}; } } } ================================================ FILE: src/score.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef SCORE_H_INCLUDED #define SCORE_H_INCLUDED #include #include #include "types.h" namespace Stockfish { class Position; class Score { public: struct Mate { int plies; }; struct Tablebase { int plies; bool win; }; struct InternalUnits { int value; }; Score() = default; Score(Value v, const Position& pos); template bool is() const { return std::holds_alternative(score); } template T get() const { return std::get(score); } template decltype(auto) visit(F&& f) const { return std::visit(std::forward(f), score); } private: std::variant score; }; } #endif // #ifndef SCORE_H_INCLUDED ================================================ FILE: src/search.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "search.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bitboard.h" #include "evaluate.h" #include "history.h" #include "misc.h" #include "movegen.h" #include "movepick.h" #include "nnue/network.h" #include "nnue/nnue_accumulator.h" #include "position.h" #include "syzygy/tbprobe.h" #include "thread.h" #include "timeman.h" #include "tt.h" #include "types.h" #include "uci.h" #include "ucioption.h" namespace Stockfish { namespace TB = Tablebases; void syzygy_extend_pv(const OptionsMap& options, const Search::LimitsType& limits, Stockfish::Position& pos, Stockfish::Search::RootMove& rootMove, Value& v); using namespace Search; namespace { constexpr int SEARCHEDLIST_CAPACITY = 32; using SearchedList = ValueList; // (*Scalers): // The values with Scaler asterisks have proven non-linear scaling. // They are optimized to time controls of 180 + 1.8 and longer, // so changing them or adding conditions that are similar requires // tests at these types of time controls. // (*Scaler) All tuned parameters at time controls shorter than // optimized for require verifications at longer time controls int correction_value(const Worker& w, const Position& pos, const Stack* const ss) { const Color us = pos.side_to_move(); const auto m = (ss - 1)->currentMove; const auto& shared = w.sharedHistory; const int pcv = shared.pawn_correction_entry(pos).at(us).pawn; const int micv = shared.minor_piece_correction_entry(pos).at(us).minor; const int wnpcv = shared.nonpawn_correction_entry(pos).at(us).nonPawnWhite; const int bnpcv = shared.nonpawn_correction_entry(pos).at(us).nonPawnBlack; const int cntcv = m.is_ok() ? (*(ss - 2)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] + (*(ss - 4)->continuationCorrectionHistory)[pos.piece_on(m.to_sq())][m.to_sq()] : 8; return 12153 * pcv + 8620 * micv + 12355 * (wnpcv + bnpcv) + 7982 * cntcv; } // Add correctionHistory value to raw staticEval and guarantee evaluation // does not hit the tablebase range. Value to_corrected_static_eval(const Value v, const int cv) { return std::clamp(v + cv / 131072, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); } void update_correction_history(const Position& pos, Stack* const ss, Search::Worker& workerThread, const int bonus) { const Move m = (ss - 1)->currentMove; const Color us = pos.side_to_move(); constexpr int nonPawnWeight = 187; auto& shared = workerThread.sharedHistory; shared.pawn_correction_entry(pos).at(us).pawn << bonus; shared.minor_piece_correction_entry(pos).at(us).minor << bonus * 153 / 128; shared.nonpawn_correction_entry(pos).at(us).nonPawnWhite << bonus * nonPawnWeight / 128; shared.nonpawn_correction_entry(pos).at(us).nonPawnBlack << bonus * nonPawnWeight / 128; // Branchless: use mask to zero bonus when move is not ok const int mask = int(m.is_ok()); const Square to = m.to_sq_unchecked(); const Piece pc = pos.piece_on(to); const int bonus2 = (bonus * 126 / 128) * mask; const int bonus4 = (bonus * 63 / 128) * mask; (*(ss - 2)->continuationCorrectionHistory)[pc][to] << bonus2; (*(ss - 4)->continuationCorrectionHistory)[pc][to] << bonus4; } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(size_t nodes) { return VALUE_DRAW - 1 + Value(nodes & 0x2); } Value value_to_tt(Value v, int ply); Value value_from_tt(Value v, int ply, int r50c); void update_pv(Move* pv, Move move, const Move* childPv); void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus); void update_all_stats(const Position& pos, Stack* ss, Search::Worker& workerThread, Move bestMove, Square prevSq, SearchedList& quietsSearched, SearchedList& capturesSearched, Depth depth, Move ttMove); bool is_shuffling(Move move, Stack* const ss, const Position& pos) { if (pos.capture_stage(move) || pos.rule50_count() < 11) return false; if (pos.state()->pliesFromNull <= 6 || ss->ply < 18) return false; return move.from_sq() == (ss - 2)->currentMove.to_sq() && (ss - 2)->currentMove.from_sq() == (ss - 4)->currentMove.to_sq(); } } // namespace Search::Worker::Worker(SharedState& sharedState, std::unique_ptr sm, size_t threadId, size_t numaThreadId, size_t numaTotalThreads, NumaReplicatedAccessToken token) : // Unpack the SharedState struct into member variables sharedHistory(sharedState.sharedHistories.at(token.get_numa_index())), threadIdx(threadId), numaThreadIdx(numaThreadId), numaTotal(numaTotalThreads), numaAccessToken(token), manager(std::move(sm)), options(sharedState.options), threads(sharedState.threads), tt(sharedState.tt), networks(sharedState.networks), refreshTable(networks[token]) { clear(); } void Search::Worker::ensure_network_replicated() { // Access once to force lazy initialization. // We do this because we want to avoid initialization during search. (void) (networks[numaAccessToken]); } void Search::Worker::start_searching() { accumulatorStack.reset(); lastIterationPV.clear(); // Non-main threads go directly to iterative_deepening() if (!is_mainthread()) { iterative_deepening(); return; } main_manager()->tm.init(limits, rootPos.side_to_move(), rootPos.game_ply(), options, main_manager()->originalTimeAdjust); tt.new_search(); if (rootMoves.empty()) { rootMoves.emplace_back(Move::none()); main_manager()->updates.onUpdateNoMoves( {0, {rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW, rootPos}}); } else { threads.start_searching(); // start non-main threads iterative_deepening(); // main thread start searching } // When we reach the maximum depth, we can arrive here without a raise of // threads.stop. However, if we are pondering or in an infinite search, // the UCI protocol states that we shouldn't print the best move before the // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here // until the GUI sends one of those commands. while (!threads.stop && (main_manager()->ponder || limits.infinite)) {} // Busy wait for a stop or a ponder reset // Stop the threads if not already stopped (also raise the stop if // "ponderhit" just reset threads.ponder) threads.stop = true; // Wait until all threads have finished threads.wait_for_search_finished(); // When playing in 'nodes as time' mode, subtract the searched nodes from // the available ones before exiting. if (limits.npmsec) main_manager()->tm.advance_nodes_time(threads.nodes_searched() - limits.inc[rootPos.side_to_move()]); Worker* bestThread = this; Skill skill = Skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); if (int(options["MultiPV"]) == 1 && !limits.depth && !skill.enabled() && rootMoves[0].pv[0] != Move::none()) bestThread = threads.get_best_thread()->worker.get(); main_manager()->bestPreviousScore = bestThread->rootMoves[0].score; main_manager()->bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; // Send again PV info if we have a new best thread if (bestThread != this) main_manager()->pv(*bestThread, threads, tt, bestThread->completedDepth); std::string ponder; if (bestThread->rootMoves[0].pv.size() > 1 || bestThread->rootMoves[0].extract_ponder_from_tt(tt, rootPos)) ponder = UCIEngine::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); auto bestmove = UCIEngine::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); main_manager()->updates.onBestmove(bestmove, ponder); } // Main iterative deepening loop. It calls search() // repeatedly with increasing depth until the allocated thinking time has been // consumed, the user stops the search, or the maximum search depth is reached. void Search::Worker::iterative_deepening() { SearchManager* mainThread = (is_mainthread() ? main_manager() : nullptr); Move pv[MAX_PLY + 1]; Depth lastBestMoveDepth = 0; Value lastBestScore = -VALUE_INFINITE; std::vector lastBestPV; Value alpha, beta; Value bestValue = -VALUE_INFINITE; Color us = rootPos.side_to_move(); double timeReduction = 1, totBestMoveChanges = 0; int delta, iterIdx = 0; // Allocate stack with extra size to allow access from (ss - 7) to (ss + 2): // (ss - 7) is needed for update_continuation_histories(ss - 1) which accesses (ss - 6), // (ss + 2) is needed for initialization of cutOffCnt. Stack stack[MAX_PLY + 10] = {}; Stack* ss = stack + 7; for (int i = 7; i > 0; --i) { (ss - i)->continuationHistory = &continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel (ss - i)->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0]; (ss - i)->staticEval = VALUE_NONE; } for (int i = 0; i <= MAX_PLY + 2; ++i) (ss + i)->ply = i; ss->pv = pv; if (mainThread) { if (mainThread->bestPreviousScore == VALUE_INFINITE) mainThread->iterValue.fill(VALUE_ZERO); else mainThread->iterValue.fill(mainThread->bestPreviousScore); } size_t multiPV = size_t(options["MultiPV"]); Skill skill(options["Skill Level"], options["UCI_LimitStrength"] ? int(options["UCI_Elo"]) : 0); // When playing with strength handicap enable MultiPV search that we will // use behind-the-scenes to retrieve a set of possible moves. if (skill.enabled()) multiPV = std::max(multiPV, size_t(4)); multiPV = std::min(multiPV, rootMoves.size()); int searchAgainCounter = 0; lowPlyHistory.fill(98); for (Color c : {WHITE, BLACK}) for (int i = 0; i < UINT_16_HISTORY_SIZE; i++) mainHistory[c][i] = mainHistory[c][i] * 820 / 1024; // Iterative deepening loop until requested to stop or the target depth is reached while (++rootDepth < MAX_PLY && !threads.stop && !(limits.depth && mainThread && rootDepth > limits.depth)) { // Age out PV variability metric if (mainThread) totBestMoveChanges /= 2; // Save the last iteration's scores before the first PV line is searched and // all the move scores except the (new) PV are set to -VALUE_INFINITE. for (RootMove& rm : rootMoves) rm.previousScore = rm.score; size_t pvFirst = 0; pvLast = 0; if (!threads.increaseDepth) searchAgainCounter++; // MultiPV loop. We perform a full root search for each PV line for (pvIdx = 0; pvIdx < multiPV; ++pvIdx) { if (pvIdx == pvLast) { pvFirst = pvLast; for (pvLast++; pvLast < rootMoves.size(); pvLast++) if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) break; } // Reset UCI info selDepth for each depth and each PV line selDepth = 0; // Reset aspiration window starting size delta = 5 + threadIdx % 8 + std::abs(rootMoves[pvIdx].meanSquaredScore) / 10208; Value avg = rootMoves[pvIdx].averageScore; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore optimism[us] = 144 * avg / (std::abs(avg) + 91); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we don't fail // high/low anymore. int failedHighCnt = 0; while (true) { // Adjust the effective depth searched, but ensure at least one // effective increment for every four searchAgain steps (see issue #2717). Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); rootDelta = beta - alpha; bestValue = search(rootPos, ss, alpha, beta, adjustedDepth, false); // Bring the best move to the front. It is critical that sorting // is done with a stable algorithm because all the values but the // first and eventually the new best one is set to -VALUE_INFINITE // and we want to keep the same order for all the moves except the // new PV that goes to the front. Note that in the case of MultiPV // search the already searched PV lines are preserved. std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); // If search has been stopped, we break immediately. Sorting is // safe because RootMoves is still valid, although it refers to // the previous iteration. if (threads.stop) break; // When failing high/low give some update before a re-search. To avoid // excessive output that could hang GUIs like Fritz 19, only start // at nodes > 10M (rather than depth N, which can be reached quickly) if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) && nodes > 10000000) main_manager()->pv(*this, threads, tt, rootDepth); // In case of failing low/high increase aspiration window and re-search, // otherwise exit the loop. if (bestValue <= alpha) { beta = alpha; alpha = std::max(bestValue - delta, -VALUE_INFINITE); failedHighCnt = 0; if (mainThread) mainThread->stopOnPonderhit = false; } else if (bestValue >= beta) { alpha = std::max(beta - delta, alpha); beta = std::min(bestValue + delta, VALUE_INFINITE); ++failedHighCnt; } else break; delta += delta / 3; assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); } // Sort the PV lines searched so far and update the GUI std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); if (mainThread && (threads.stop || pvIdx + 1 == multiPV || nodes > 10000000) // A thread that aborted search can have a mated-in/TB-loss score and // PV that cannot be trusted, i.e. it can be delayed or refuted if we // would have had time to fully search other root-moves. Thus here we // suppress any exact mated-in/TB loss output and, if we do, below pick // the score/PV from the previously completed iteration with the most // recent bestmove change. && !(threads.stop && is_loss(rootMoves[0].uciScore) && rootMoves[0].score == rootMoves[0].uciScore)) main_manager()->pv(*this, threads, tt, rootDepth); if (threads.stop) break; } if (!threads.stop) { completedDepth = rootDepth; lastIterationPV = rootMoves[0].pv; } // We make sure not to pick an unproven mated-in score, // in case this thread prematurely stopped search (aborted-search). if (completedDepth != rootDepth && rootMoves[0].score != -VALUE_INFINITE && is_loss(rootMoves[0].score)) { // Bring the last best move to the front for best thread selection. // For an aborted d1 search we label the loss score as inexact. if (!lastBestPV.empty()) { Utility::move_to_front(rootMoves, [&lastBestPV = std::as_const(lastBestPV)](const auto& rm) { return rm == lastBestPV[0]; }); rootMoves[0].pv = lastBestPV; rootMoves[0].score = rootMoves[0].uciScore = lastBestScore; } else { if (!rootMoves[0].scoreLowerbound) rootMoves[0].scoreUpperbound = true; if (mainThread) main_manager()->pv(*this, threads, tt, rootDepth); } } else if (lastBestPV.empty() || rootMoves[0].pv[0] != lastBestPV[0]) { lastBestPV = rootMoves[0].pv; lastBestScore = rootMoves[0].score; lastBestMoveDepth = rootDepth; } // Have we found a "mate in x" after a completed iteration? if (limits.mate && !threads.stop && ((rootMoves[0].score >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - rootMoves[0].score <= 2 * limits.mate) || (rootMoves[0].score <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + rootMoves[0].score <= 2 * limits.mate))) threads.stop = true; if (!mainThread) continue; // If the skill level is enabled and time is up, pick a sub-optimal best move if (skill.enabled() && skill.time_to_pick(rootDepth)) skill.pick_best(rootMoves, multiPV); // Use part of the gained time from a previous stable move for the current move for (auto&& th : threads) { totBestMoveChanges += th->worker->bestMoveChanges; th->worker->bestMoveChanges = 0; } // Do we have time for the next iteration? Can we stop searching now? if (limits.use_time_management() && !threads.stop && !mainThread->stopOnPonderhit) { uint64_t nodesEffort = rootMoves[0].effort * 100000 / std::max(size_t(1), size_t(nodes)); double fallingEval = (12.44 + 2.318 * (mainThread->bestPreviousAverageScore - bestValue) + 0.95 * (mainThread->iterValue[iterIdx] - bestValue)) / 100.0; fallingEval = std::clamp(fallingEval, 0.581, 1.655); // If the bestMove is stable over several iterations, reduce time accordingly double k = 0.476; double center = lastBestMoveDepth + 11.565; timeReduction = 0.64 + 0.93 / (0.953 + std::exp(-k * (completedDepth - center))); double reduction = (1.5 + mainThread->previousTimeReduction) / (2.255 * timeReduction); double bestMoveInstability = 1.088 + 2.315 * totBestMoveChanges / threads.size(); double highBestMoveEffort = nodesEffort > 86000 ? 0.74 : 0.96; double totalTime = mainThread->tm.optimum() * fallingEval * reduction * bestMoveInstability * highBestMoveEffort; // Cap used time in case of a single legal move for a better viewer experience if (rootMoves.size() == 1) totalTime = std::min(504.4, totalTime); auto elapsedTime = elapsed(); // Stop the search if we have exceeded the totalTime or maximum if (elapsedTime > std::min(totalTime, double(mainThread->tm.maximum()))) { // If we are allowed to ponder do not stop the search now but // keep pondering until the GUI sends "ponderhit" or "stop". if (mainThread->ponder) mainThread->stopOnPonderhit = true; else threads.stop = true; } else threads.increaseDepth = mainThread->ponder || elapsedTime <= totalTime * 0.50; } mainThread->iterValue[iterIdx] = bestValue; iterIdx = (iterIdx + 1) & 3; } if (!mainThread) return; mainThread->previousTimeReduction = timeReduction; // If the skill level is enabled, swap the best PV line with the sub-optimal one if (skill.enabled()) std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(), skill.best ? skill.best : skill.pick_best(rootMoves, multiPV))); } void Search::Worker::do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss) { do_move(pos, move, st, pos.gives_check(move), ss); } void Search::Worker::do_move( Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss) { bool capture = pos.capture_stage(move); // Preferable over fetch_add to avoid locking instructions nodes.store(nodes.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); auto [dirtyPiece, dirtyThreats] = accumulatorStack.push(); pos.do_move(move, st, givesCheck, dirtyPiece, dirtyThreats, &tt, &sharedHistory); if (ss != nullptr) { ss->currentMove = move; ss->continuationHistory = &continuationHistory[ss->inCheck][capture][dirtyPiece.pc][move.to_sq()]; ss->continuationCorrectionHistory = &continuationCorrectionHistory[dirtyPiece.pc][move.to_sq()]; } } void Search::Worker::do_null_move(Position& pos, StateInfo& st, Stack* const ss) { pos.do_null_move(st); ss->currentMove = Move::null(); ss->continuationHistory = &continuationHistory[0][0][NO_PIECE][0]; ss->continuationCorrectionHistory = &continuationCorrectionHistory[NO_PIECE][0]; } void Search::Worker::undo_move(Position& pos, const Move move) { pos.undo_move(move); accumulatorStack.pop(); } void Search::Worker::undo_null_move(Position& pos) { pos.undo_null_move(); } // Reset histories, usually before a new game void Search::Worker::clear() { mainHistory.fill(0); captureHistory.fill(-678); // Each thread is responsible for clearing their part of shared history sharedHistory.correctionHistory.clear_range(0, numaThreadIdx, numaTotal); sharedHistory.pawnHistory.clear_range(-1238, numaThreadIdx, numaTotal); ttMoveHistory = 0; for (auto& to : continuationCorrectionHistory) for (auto& h : to) h.fill(6); for (bool inCheck : {false, true}) for (StatsType c : {NoCaptures, Captures}) for (auto& to : continuationHistory[inCheck][c]) for (auto& h : to) h.fill(-523); for (size_t i = 1; i < reductions.size(); ++i) reductions[i] = int(2763 / 128.0 * std::log(i)); refreshTable.clear(networks[numaAccessToken]); } // Main search function for both PV and non-PV nodes template Value Search::Worker::search( Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; const bool allNode = !(PvNode || cutNode); // Dive into quiescence search when the depth reaches zero if (depth <= 0) return qsearch(pos, ss, alpha, beta); // Limit the depth if extensions made it too large depth = std::min(depth, MAX_PLY - 1); // Check if we have an upcoming move that draws by repetition if (!rootNode && alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(nodes); if (alpha >= beta) return alpha; } assert(-VALUE_INFINITE <= alpha && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); assert(0 < depth && depth < MAX_PLY); assert(!(PvNode && cutNode)); Move pv[MAX_PLY + 1]; StateInfo st; Key posKey; Move move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, eval, maxValue, probCutBeta; bool givesCheck, improving, priorCapture, opponentWorsening; bool capture, ttCapture; int priorReduction; Piece movedPiece; SearchedList capturesSearched; SearchedList quietsSearched; // Step 1. Initialize node ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); ss->moveCount = 0; bestValue = -VALUE_INFINITE; maxValue = VALUE_INFINITE; ss->followPV = rootNode || ((ss - 1)->followPV && static_cast(ss->ply - 1) < lastIterationPV.size() && (ss - 1)->currentMove == lastIterationPV[ss->ply - 1]); // Check for the available remaining time if (is_mainthread()) main_manager()->check_time(*this); // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) if (PvNode && selDepth < ss->ply + 1) selDepth = ss->ply + 1; if (!rootNode) { // Step 2. Check for aborted search and immediate draw if (threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : value_draw(nodes); // Step 3. Mate distance pruning. Even if we mate at the next move our score // would be at best mate_in(ss->ply + 1), but if alpha is already bigger because // a shorter mate was found upward in the tree then there is no need to search // because we will never beat the current alpha. Same logic but with reversed // signs apply also in the opposite condition of being mated instead of giving // mate. In this case, return a fail-high score. alpha = std::max(mated_in(ss->ply), alpha); beta = std::min(mate_in(ss->ply + 1), beta); if (alpha >= beta) return alpha; } assert(0 <= ss->ply && ss->ply < MAX_PLY); Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; bestMove = Move::none(); priorReduction = (ss - 1)->reduction; (ss - 1)->reduction = 0; ss->statScore = 0; (ss + 2)->cutoffCnt = 0; // Step 4. Transposition table lookup excludedMove = ss->excludedMove; posKey = pos.key(); auto [ttHit, ttData, ttWriter] = tt.probe(posKey); // Need further processing of the saved data ss->ttHit = ttHit; ttData.move = rootNode ? rootMoves[pvIdx].pv[0] : ttHit ? ttData.move : Move::none(); ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; ss->ttPv = excludedMove ? ss->ttPv : PvNode || (ttHit && ttData.is_pv); ttCapture = ttData.move && pos.capture_stage(ttData.move); // Step 6. Static evaluation of the position Value unadjustedStaticEval = VALUE_NONE; const auto correctionValue = correction_value(*this, pos, ss); // Skip early pruning when in check if (ss->inCheck) ss->staticEval = eval = (ss - 2)->staticEval; else if (excludedMove) unadjustedStaticEval = eval = ss->staticEval; else if (ss->ttHit) { // Never assume anything about values stored in TT unadjustedStaticEval = ttData.eval; if (!is_valid(unadjustedStaticEval)) unadjustedStaticEval = evaluate(pos); ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue); // ttValue can be used as a better position evaluation if (is_valid(ttData.value) && (ttData.bound & (ttData.value > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttData.value; } else { unadjustedStaticEval = evaluate(pos); ss->staticEval = eval = to_corrected_static_eval(unadjustedStaticEval, correctionValue); // Static evaluation is saved as it was before adjustment by correction history ttWriter.write(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, tt.generation()); } // Set up the improving flag, which is true if current static evaluation is // bigger than the previous static evaluation at our turn (if we were in // check at our previous move we go back until we weren't in check) and is // false otherwise. The improving flag is used in various pruning heuristics. // Similarly, opponentWorsening is true if our static evaluation is better // for us than at the last ply. improving = ss->staticEval > (ss - 2)->staticEval; opponentWorsening = ss->staticEval > -(ss - 1)->staticEval; // Hindsight adjustment of reductions based on static evaluation difference. if (priorReduction >= 3 && !opponentWorsening) depth++; if (priorReduction >= 2 && depth >= 2 && ss->staticEval + (ss - 1)->staticEval > 195) depth--; // At non-PV nodes we check for an early TT cutoff if (!PvNode && !excludedMove && ttData.depth > depth - (ttData.value <= beta) && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER)) && (cutNode == (ttData.value >= beta) || depth > 5)) { // If ttMove is quiet, update move sorting heuristics on TT hit if (ttData.move && ttData.value >= beta) { // Bonus for a quiet ttMove that fails high if (!ttCapture) update_quiet_histories(pos, ss, *this, ttData.move, std::min(119 * depth - 74, 855)); // Extra penalty for early quiet moves of the previous ply if (prevSq != SQ_NONE && (ss - 1)->moveCount < 4 && !priorCapture) update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -2014); } // Partial workaround for the graph history interaction problem // For high rule50 counts don't produce transposition table cutoffs. if (pos.rule50_count() < 96) { if (depth >= 7 && ttData.move && pos.pseudo_legal(ttData.move) && pos.legal(ttData.move) && !is_decisive(ttData.value)) { pos.do_move(ttData.move, st); Key nextPosKey = pos.key(); auto [ttHitNext, ttDataNext, ttWriterNext] = tt.probe(nextPosKey); pos.undo_move(ttData.move); // Check that the ttValue after the tt move would also trigger a cutoff if (!is_valid(ttDataNext.value)) return ttData.value; if ((ttData.value >= beta) == (-ttDataNext.value >= beta)) return ttData.value; } else return ttData.value; } } // Step 5. Tablebases probe if (!rootNode && !excludedMove && tbConfig.cardinality) { int piecesCount = pos.count(); if (piecesCount <= tbConfig.cardinality && (piecesCount < tbConfig.cardinality || depth >= tbConfig.probeDepth) && pos.rule50_count() == 0 && !pos.can_castle(ANY_CASTLING)) { TB::ProbeState err; TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); // Force check of time on the next occasion if (is_mainthread()) main_manager()->callsCnt = 0; if (err != TB::ProbeState::FAIL) { // Preferable over fetch_add to avoid locking instructions tbHits.store(tbHits.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); int drawScore = tbConfig.useRule50 ? 1 : 0; Value tbValue = VALUE_TB - ss->ply; // Use the range VALUE_TB to VALUE_TB_WIN_IN_MAX_PLY to score value = wdl < -drawScore ? -tbValue : wdl > drawScore ? tbValue : VALUE_DRAW + 2 * wdl * drawScore; Bound b = wdl < -drawScore ? BOUND_UPPER : wdl > drawScore ? BOUND_LOWER : BOUND_EXACT; if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), Move::none(), VALUE_NONE, tt.generation()); return value; } if (PvNode) { if (b == BOUND_LOWER) bestValue = value, alpha = std::max(alpha, bestValue); else maxValue = value; } } } } if (ss->inCheck) goto moves_loop; // Use static evaluation difference to improve quiet move ordering if (((ss - 1)->currentMove).is_ok() && !(ss - 1)->inCheck && !priorCapture) { int evalDiff = std::clamp(-int((ss - 1)->staticEval + ss->staticEval), -214, 171) + 60; mainHistory[~us][((ss - 1)->currentMove).raw()] << evalDiff * 10; if (!ttHit && type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << evalDiff * 12; } // Step 7. Razoring // If eval is really low, skip search entirely and return the qsearch value. // For PvNodes, we must have a guard against mates being returned. if (!PvNode && eval < alpha - 502 - 306 * depth * depth) return qsearch(pos, ss, alpha, beta); // Step 8. Futility pruning: child node // The depth condition is important for mate finding. { auto futility_margin = [&](Depth d) { Value futilityMult = 76 - 21 * !ss->ttHit; return futilityMult * d - (2686 * improving + 362 * opponentWorsening) * futilityMult / 1024 // + std::abs(correctionValue) / 180600; }; if (!ss->ttPv && depth < 15 && eval - futility_margin(depth) >= beta && eval >= beta && (!ttData.move || ttCapture) && !is_loss(beta) && !is_win(eval)) return (2 * beta + eval) / 3; } // Step 9. Null move search with verification search if (cutNode && ss->staticEval >= beta - 16 * depth - 53 * improving + 378 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= nmpMinPly && !is_loss(beta)) { assert((ss - 1)->currentMove != Move::null()); // Null move dynamic reduction based on depth Depth R = 7 + depth / 3; do_null_move(pos, st, ss); Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, false); undo_null_move(pos); // Do not return unproven mate or TB scores if (nullValue >= beta && !is_win(nullValue)) { if (nmpMinPly || depth < 16) return nullValue; assert(!nmpMinPly); // Recursive verification is not allowed // Do verification search at high depths, with null move pruning disabled // until ply exceeds nmpMinPly. nmpMinPly = ss->ply + 3 * (depth - R) / 4; Value v = search(pos, ss, beta - 1, beta, depth - R, false); nmpMinPly = 0; if (v >= beta) return nullValue; } } improving |= ss->staticEval >= beta; // Step 10. Internal iterative reductions // At sufficient depth, reduce depth for PV/Cut nodes without a TTMove. // (*Scaler) Making IIR more aggressive scales poorly. if (!ss->followPV && !allNode && depth >= 6 && !ttData.move && priorReduction <= 3) depth--; // Step 11. ProbCut // If we have a good enough capture (or queen promotion) and a reduced search // returns a value much above beta, we can (almost) safely prune the previous move. probCutBeta = beta + 224 - 61 * improving; if (depth >= 3 && !is_decisive(beta) // If value from transposition table is lower than probCutBeta, don't attempt // probCut there && !(is_valid(ttData.value) && ttData.value < probCutBeta)) { assert(probCutBeta < VALUE_INFINITE && probCutBeta > beta); MovePicker mp(pos, ttData.move, probCutBeta - ss->staticEval, &captureHistory); Depth probCutDepth = depth - 4; while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); if (move == excludedMove || !pos.legal(move)) continue; assert(pos.capture_stage(move)); do_move(pos, move, st, ss); // Perform a preliminary qsearch to verify that the move holds value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); // If the qsearch held, perform the regular search if (value >= probCutBeta && probCutDepth > 0) value = -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, probCutDepth, !cutNode); undo_move(pos, move); if (value >= probCutBeta) { // Save ProbCut data into transposition table ttWriter.write(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, probCutDepth + 1, move, unadjustedStaticEval, tt.generation()); if (!is_decisive(value)) return value - (probCutBeta - beta); } } } moves_loop: // When in check, search starts here // Step 12. A small Probcut idea probCutBeta = beta + 416; if ((ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 4 && ttData.value >= probCutBeta && !is_decisive(beta) && is_valid(ttData.value) && !is_decisive(ttData.value)) return probCutBeta; const PieceToHistory* contHist[] = { (ss - 1)->continuationHistory, (ss - 2)->continuationHistory, (ss - 3)->continuationHistory, (ss - 4)->continuationHistory, (ss - 5)->continuationHistory, (ss - 6)->continuationHistory}; MovePicker mp(pos, ttData.move, depth, &mainHistory, &lowPlyHistory, &captureHistory, contHist, &sharedHistory, ss->ply); value = bestValue; int moveCount = 0; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); if (move == excludedMove) continue; // Check for legality if (!pos.legal(move)) continue; // At root obey the "searchmoves" option and skip moves not listed in Root // Move List. In MultiPV mode we also skip PV moves that have been already // searched and those of lower "TB rank" if we are in a TB root position. if (rootNode && !std::count(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast, move)) continue; ss->moveCount = ++moveCount; if (rootNode && is_mainthread() && nodes > 10000000) { main_manager()->updates.onIter( {depth, UCIEngine::move(move, pos.is_chess960()), moveCount + pvIdx}); } if (PvNode) (ss + 1)->pv = nullptr; extension = 0; capture = pos.capture_stage(move); movedPiece = pos.moved_piece(move); givesCheck = pos.gives_check(move); // Calculate new depth for this move newDepth = depth - 1; int delta = beta - alpha; Depth r = reduction(improving, depth, moveCount, delta); // Increase reduction for ttPv nodes (*Scaler) // Larger values scale well if (ss->ttPv) r += 1013; // Step 14. Pruning at shallow depths. // Depth conditions are important for mate finding. if (!rootNode && pos.non_pawn_material(us) && !is_loss(bestValue)) { // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold if (moveCount >= (3 + depth * depth) / (2 - improving)) mp.skip_quiet_moves(); // Reduced depth of the next LMR search int lmrDepth = newDepth - r / 1024; if (capture || givesCheck) { Piece capturedPiece = pos.piece_on(move.to_sq()); int captHist = captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)]; // Futility pruning for captures if (!givesCheck && lmrDepth < 7) { Value futilityValue = ss->staticEval + 218 + 223 * lmrDepth + PieceValue[capturedPiece] + 131 * captHist / 1024; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks // Avoid pruning sacrifices of our last piece for stalemate int margin = std::max(167 * depth + captHist * 34 / 1024, 0); if ((alpha >= VALUE_DRAW || pos.non_pawn_material(us) != PieceValue[movedPiece]) && !pos.see_ge(move, -margin)) continue; } else if (!ss->followPV || !PvNode) { int history = (*contHist[0])[movedPiece][move.to_sq()] + (*contHist[1])[movedPiece][move.to_sq()] + sharedHistory.pawn_entry(pos)[movedPiece][move.to_sq()]; // Continuation history based pruning if (history < -4097 * depth) continue; history += 71 * mainHistory[us][move.raw()] / 32; // (*Scaler): Generally, lower divisors scales well lmrDepth += history / 2995; Value futilityValue = ss->staticEval + 42 + 151 * !bestMove + 120 * lmrDepth + 86 * (ss->staticEval > alpha); // Futility pruning: parent node // (*Scaler): Generally, more frequent futility pruning // scales well if (!ss->inCheck && lmrDepth < 13 && futilityValue <= alpha) { if (bestValue <= futilityValue && !is_decisive(bestValue) && !is_win(futilityValue)) bestValue = futilityValue; continue; } lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE if (!pos.see_ge(move, -25 * lmrDepth * lmrDepth)) continue; } } // Step 15. Extensions // Singular extension search. If all moves but one // fail low on a search of (alpha-s, beta-s), and just one fails high on // (alpha, beta), then that move is singular and should be extended. To // verify this we do a reduced search on the position excluding the ttMove // and if the result is lower than ttValue minus a margin, then we will // extend the ttMove. Recursive singular search is avoided. // (*Scaler) Generally, higher singularBeta (i.e closer to ttValue) // and lower extension margins scale well. if (!rootNode && move == ttData.move && !excludedMove && depth >= 6 + ss->ttPv && is_valid(ttData.value) && !is_decisive(ttData.value) && (ttData.bound & BOUND_LOWER) && ttData.depth >= depth - 3 && !is_shuffling(move, ss, pos)) { Value singularBeta = ttData.value - (60 + 66 * (ss->ttPv && !PvNode)) * depth / 55; Depth singularDepth = newDepth / 2; ss->excludedMove = move; value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); ss->excludedMove = Move::none(); if (value < singularBeta) { int corrValAdj = std::abs(correctionValue) / 210590; int doubleMargin = -4 + 212 * PvNode - 182 * !ttCapture - corrValAdj - 906 * ttMoveHistory / 116517 - (ss->ply > rootDepth) * 44; int tripleMargin = 73 + 320 * PvNode - 218 * !ttCapture + 92 * ss->ttPv - corrValAdj - (ss->ply > rootDepth) * 45; extension = 1 + (value < singularBeta - doubleMargin) + (value < singularBeta - tripleMargin); depth++; } // Multi-cut pruning // Our ttMove is assumed to fail high based on the bound of the TT entry, // and if after excluding the ttMove with a reduced search we fail high // over the original beta, we assume this expected cut-node is not // singular (multiple moves fail high), and we can prune the whole // subtree by returning a softbound. else if (value >= beta && !is_decisive(value)) { ttMoveHistory << std::max(-424 - 107 * depth, -3375); return value; } // Negative extensions // If other moves failed high over (ttValue - margin) without the // ttMove on a reduced search, but we cannot do multi-cut because // (ttValue - margin) is lower than the original beta, we do not know // if the ttMove is singular or can do a multi-cut, so we reduce the // ttMove in favor of other moves based on some conditions: // If the ttMove is assumed to fail high over current beta else if (ttData.value >= beta) extension = -3; // If we are on a cutNode but the ttMove is not assumed to fail high // over current beta else if (cutNode) extension = -2; } // Step 16. Make the move do_move(pos, move, st, givesCheck, ss); // Add extension to new depth newDepth += extension; uint64_t nodeCount = rootNode ? uint64_t(nodes) : 0; // Decrease reduction for PvNodes (*Scaler) if (ss->ttPv) r -= 2819 + PvNode * 973 + (ttData.value > alpha) * 905 + (ttData.depth >= depth) * (935 + cutNode * 959); r += 691; // Base reduction offset to compensate for other tweaks r -= moveCount * 65; r -= std::abs(correctionValue) / 25600; // Increase reduction for cut nodes if (cutNode) r += 3611 + 985 * !ttData.move; // Increase reduction if ttMove is a capture if (ttCapture) r += 1054; // Increase reduction if next ply has a lot of fail high if ((ss + 1)->cutoffCnt > 1) r += 251 + 1124 * ((ss + 1)->cutoffCnt > 2) + 1042 * allNode; // For first picked move (ttMove) reduce reduction if (move == ttData.move) r -= 2239; if (capture) ss->statScore = 863 * int(PieceValue[pos.captured_piece()]) / 128 + captureHistory[movedPiece][move.to_sq()][type_of(pos.captured_piece())]; else ss->statScore = 2 * mainHistory[us][move.raw()] + (*contHist[0])[movedPiece][move.to_sq()] + (*contHist[1])[movedPiece][move.to_sq()]; // Decrease/increase reduction for moves with a good/bad history r -= ss->statScore * 428 / 4096; // Scale up reductions for expected ALL nodes if (allNode) r += r * 273 / (256 * depth + 260); // Step 17. Late moves reduction / extension (LMR) if (depth >= 2 && moveCount > 1) { // In general we want to cap the LMR depth search at newDepth, but when // reduction is negative, we allow this move a limited search extension // beyond the first move depth. // To prevent problems when the max value is less than the min value, // std::clamp has been replaced by a more robust implementation. Depth d = std::max(1, std::min(newDepth - r / 1024, newDepth + 2)) + PvNode; ss->reduction = newDepth - d; value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); ss->reduction = 0; // Do a full-depth search when reduced LMR search fails high // (*Scaler) Shallower searches here don't scale well if (value > alpha) { // Adjust full-depth search based on LMR results - if the result was // good enough search deeper, if it was bad enough search shallower. const bool doDeeperSearch = d < newDepth && value > bestValue + 48; const bool doShallowerSearch = value < bestValue + 9; newDepth += doDeeperSearch - doShallowerSearch; if (newDepth > d) value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); // Post LMR continuation history updates update_continuation_histories(ss, movedPiece, move.to_sq(), 1426); } } // Step 18. Full-depth search when LMR is skipped else if (!PvNode || moveCount > 1) { // Increase reduction if ttMove is not present if (!ttData.move) r += 1057; // Note that if expected reduction is high, we reduce search depth here value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth - (r > 4628) - (r > 5772 && newDepth > 2), !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail high, // otherwise let the parent node fail low with value <= alpha and try another move. if (PvNode && (moveCount == 1 || value > alpha)) { (ss + 1)->pv = pv; (ss + 1)->pv[0] = Move::none(); // Extend move from transposition table if we are about to dive into qsearch. // decisive score handling improves mate finding and retrograde analysis. if (move == ttData.move && ((is_valid(ttData.value) && is_decisive(ttData.value) && ttData.depth > 0) || ttData.depth > 1)) newDepth = std::max(newDepth, 1); value = -search(pos, ss + 1, -beta, -alpha, newDepth, false); } // Step 19. Undo move undo_move(pos, move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 20. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without updating // best move, principal variation nor transposition table. if (threads.stop.load(std::memory_order_relaxed)) return VALUE_ZERO; if (rootNode) { RootMove& rm = *std::find(rootMoves.begin(), rootMoves.end(), move); rm.effort += nodes - nodeCount; rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (value + rm.averageScore) / 2 : value; rm.meanSquaredScore = rm.meanSquaredScore != -VALUE_INFINITE * VALUE_INFINITE ? (value * std::abs(value) + rm.meanSquaredScore) / 2 : value * std::abs(value); // PV move or new best move? if (moveCount == 1 || value > alpha) { rm.score = rm.uciScore = value; rm.selDepth = selDepth; rm.scoreLowerbound = rm.scoreUpperbound = false; if (value >= beta) { rm.scoreLowerbound = true; rm.uciScore = beta; } else if (value <= alpha) { rm.scoreUpperbound = true; rm.uciScore = alpha; } rm.pv.resize(1); assert((ss + 1)->pv); for (Move* m = (ss + 1)->pv; *m != Move::none(); ++m) rm.pv.push_back(*m); // We record how often the best move has been changed in each iteration. // This information is used for time management. In MultiPV mode, // we must take care to only do this for the first PV line. if (moveCount > 1 && !pvIdx) ++bestMoveChanges; } else // All other moves but the PV, are set to the lowest value: this // is not a problem when sorting because the sort is stable and the // move position in the list is preserved - just the PV is pushed up. rm.score = -VALUE_INFINITE; } // In case we have an alternative move equal in eval to the current bestmove, // promote it to bestmove by pretending it just exceeds alpha (but not beta). int inc = (value == bestValue && ss->ply + 2 >= rootDepth && (int(nodes) & 14) == 0 && !is_win(std::abs(value) + 1)); if (value + inc > bestValue) { bestValue = value; if (value + inc > alpha) { bestMove = move; if (PvNode && !rootNode) // Update pv even in fail-high case update_pv(ss->pv, move, (ss + 1)->pv); if (value >= beta) { // (*Scaler) Infrequent and small updates scale well ss->cutoffCnt += (extension < 2) || PvNode; assert(value >= beta); // Fail high break; } // Reduce other moves if we have found at least one score improvement if (depth > 2 && depth < 14 && !is_decisive(value)) depth -= 2; assert(depth > 0); alpha = value; // Update alpha! Always alpha < beta } } // If the move is worse than some previously searched move, // remember it, to update its stats later. if (move != bestMove && moveCount <= SEARCHEDLIST_CAPACITY) { if (capture) capturesSearched.push_back(move); else quietsSearched.push_back(move); } } // Step 21. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); // Adjust best value for fail high cases if (bestValue >= beta && !is_decisive(bestValue) && !is_decisive(alpha)) bestValue = (bestValue * depth + beta) / (depth + 1); if (!moveCount) bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; // If there is a move that produces search value greater than alpha, // we update the stats of searched moves. else if (bestMove) { update_all_stats(pos, ss, *this, bestMove, prevSq, quietsSearched, capturesSearched, depth, ttData.move); if (!PvNode) ttMoveHistory << (bestMove == ttData.move ? 805 : -787); } // Bonus for prior quiet countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { int bonusScale = -232; bonusScale -= (ss - 1)->statScore / 108; bonusScale += std::min(59 * depth, 454); bonusScale += 169 * ((ss - 1)->moveCount > 8); bonusScale += 145 * (!ss->inCheck && bestValue <= ss->staticEval - 110); bonusScale += 154 * (!(ss - 1)->inCheck && bestValue <= -(ss - 1)->staticEval - 73); bonusScale = std::max(bonusScale, 0); // scaledBonus ranges from 0 to roughly 2.3M, overflows happen for multipliers larger than 900 const int scaledBonus = std::min(135 * depth - 80, 1400) * bonusScale; update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, scaledBonus * 221 / 16384); mainHistory[~us][((ss - 1)->currentMove).raw()] << scaledBonus * 235 / 32768; if (type_of(pos.piece_on(prevSq)) != PAWN && ((ss - 1)->currentMove).type_of() != PROMOTION) sharedHistory.pawn_entry(pos)[pos.piece_on(prevSq)][prevSq] << scaledBonus * 290 / 8192; } // Bonus for prior capture countermove that caused the fail low else if (priorCapture && prevSq != SQ_NONE) { Piece capturedPiece = pos.captured_piece(); assert(capturedPiece != NO_PIECE); captureHistory[pos.piece_on(prevSq)][prevSq][type_of(capturedPiece)] << 1018; } if (PvNode) bestValue = std::min(bestValue, maxValue); // If no good move is found and the previous position was ttPv, then the previous // opponent move is probably good and the new position is added to the search tree. if (bestValue <= alpha) ss->ttPv = ss->ttPv || (ss - 1)->ttPv; // Write gathered information in transposition table. Note that the // static evaluation is saved as it was before correction history. if (!excludedMove && !(rootNode && pvIdx)) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, moveCount != 0 ? depth : std::min(MAX_PLY - 1, depth + 6), bestMove, unadjustedStaticEval, tt.generation()); // Adjust correction history if the best move is not a capture // and the error direction matches whether we are above/below bounds. if (!ss->inCheck && !(bestMove && pos.capture(bestMove)) && (bestValue > ss->staticEval) == bool(bestMove)) { auto bonus = std::clamp(int(bestValue - ss->staticEval) * depth * (bestMove ? 12 : 17) / 128, -CORRECTION_HISTORY_LIMIT / 4, CORRECTION_HISTORY_LIMIT / 4); update_correction_history(pos, ss, *this, 1069 * bonus / 1024); } assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; } // Quiescence search function, which is called by the main search function with // depth zero, or recursively with further decreasing depth. With depth <= 0, we // "should" be using static eval only, but tactical moves may confuse the static eval. // To fight this horizon effect, we implement this qsearch of tactical moves. // See https://www.chessprogramming.org/Horizon_Effect // and https://www.chessprogramming.org/Quiescence_Search template Value Search::Worker::qsearch(Position& pos, Stack* ss, Value alpha, Value beta) { static_assert(nodeType != Root); constexpr bool PvNode = nodeType == PV; assert(alpha >= -VALUE_INFINITE && alpha < beta && beta <= VALUE_INFINITE); assert(PvNode || (alpha == beta - 1)); // Check if we have an upcoming move that draws by repetition if (alpha < VALUE_DRAW && pos.upcoming_repetition(ss->ply)) { alpha = value_draw(nodes); if (alpha >= beta) return alpha; } Move pv[MAX_PLY + 1]; StateInfo st; Key posKey; Move move, bestMove; Value bestValue, value, futilityBase; bool pvHit, givesCheck, capture; int moveCount; // Step 1. Initialize node if (PvNode) { (ss + 1)->pv = pv; ss->pv[0] = Move::none(); } bestMove = Move::none(); ss->inCheck = pos.checkers(); moveCount = 0; // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) if (PvNode && selDepth < ss->ply + 1) selDepth = ss->ply + 1; // Step 2. Check for an immediate draw or maximum ply reached if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; assert(0 <= ss->ply && ss->ply < MAX_PLY); // Step 3. Transposition table lookup posKey = pos.key(); auto [ttHit, ttData, ttWriter] = tt.probe(posKey); // Need further processing of the saved data ss->ttHit = ttHit; ttData.move = ttHit ? ttData.move : Move::none(); ttData.value = ttHit ? value_from_tt(ttData.value, ss->ply, pos.rule50_count()) : VALUE_NONE; pvHit = ttHit && ttData.is_pv; // At non-PV nodes we check for an early TT cutoff if (!PvNode && ttData.depth >= DEPTH_QS && is_valid(ttData.value) // Can happen when !ttHit or when access race in probe() && (ttData.bound & (ttData.value >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttData.value; // Step 4. Static evaluation of the position Value unadjustedStaticEval = VALUE_NONE; if (ss->inCheck) bestValue = futilityBase = -VALUE_INFINITE; else { const auto correctionValue = correction_value(*this, pos, ss); if (ss->ttHit) { // Never assume anything about values stored in TT unadjustedStaticEval = ttData.eval; if (!is_valid(unadjustedStaticEval)) unadjustedStaticEval = evaluate(pos); ss->staticEval = bestValue = to_corrected_static_eval(unadjustedStaticEval, correctionValue); // ttValue can be used as a better position evaluation if (is_valid(ttData.value) && !is_decisive(ttData.value) && (ttData.bound & (ttData.value > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttData.value; } else { unadjustedStaticEval = evaluate(pos); ss->staticEval = bestValue = to_corrected_static_eval(unadjustedStaticEval, correctionValue); } // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { if (!is_decisive(bestValue)) bestValue = (bestValue + beta) / 2; if (!ss->ttHit) ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_UNSEARCHED, Move::none(), unadjustedStaticEval, tt.generation()); return bestValue; } if (bestValue > alpha) alpha = bestValue; futilityBase = ss->staticEval + 328; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory}; Square prevSq = ((ss - 1)->currentMove).is_ok() ? ((ss - 1)->currentMove).to_sq() : SQ_NONE; // Initialize a MovePicker object for the current position, and prepare to search // the moves. We presently use two stages of move generator in quiescence search: // captures, or evasions only when in check. MovePicker mp(pos, ttData.move, DEPTH_QS, &mainHistory, &lowPlyHistory, &captureHistory, contHist, &sharedHistory, ss->ply); // Step 5. Loop through all pseudo-legal moves until no moves remain or a beta // cutoff occurs. while ((move = mp.next_move()) != Move::none()) { assert(move.is_ok()); if (!pos.legal(move)) continue; givesCheck = pos.gives_check(move); capture = pos.capture_stage(move); moveCount++; // Step 6. Pruning if (!is_loss(bestValue)) { // Futility pruning and moveCount pruning if (!givesCheck && move.to_sq() != prevSq && !is_loss(futilityBase) && move.type_of() != PROMOTION) { if (moveCount > 2) continue; Value futilityValue = futilityBase + PieceValue[pos.piece_on(move.to_sq())]; // If static eval + value of piece we are going to capture is // much lower than alpha, we can prune this move. if (futilityValue <= alpha) { bestValue = std::max(bestValue, futilityValue); continue; } // If static exchange evaluation is low enough // we can prune this move. if (!pos.see_ge(move, alpha - futilityBase)) { bestValue = std::max(bestValue, std::min(alpha, futilityBase)); continue; } } // Skip non-captures if (!capture) continue; // Do not search moves with bad enough SEE values if (!pos.see_ge(move, -73)) continue; } // Step 7. Make and search the move do_move(pos, move, st, givesCheck, ss); value = -qsearch(pos, ss + 1, -beta, -alpha); undo_move(pos, move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 8. Check for a new best move if (value > bestValue) { bestValue = value; if (value > alpha) { bestMove = move; if (PvNode) // Update pv even in fail-high case update_pv(ss->pv, move, (ss + 1)->pv); if (value < beta) // Update alpha here! alpha = value; else break; // Fail high } } } // Step 9. Check for mate // All legal moves have been searched. A special case: if we are // in check and no legal moves were found, it is checkmate. if (ss->inCheck && bestValue == -VALUE_INFINITE) { assert(!MoveList(pos).size()); return mated_in(ss->ply); // Plies to mate from the root } if (!is_decisive(bestValue) && bestValue > beta) bestValue = (bestValue + beta) / 2; Color us = pos.side_to_move(); if (!ss->inCheck && !moveCount && !pos.non_pawn_material(us) && type_of(pos.captured_piece()) >= ROOK) { if (!((us == WHITE ? shift(pos.pieces(us, PAWN)) : shift(pos.pieces(us, PAWN))) & ~pos.pieces())) // no pawn pushes available { pos.state()->checkersBB = Rank1BB; // search for legal king-moves only if (!MoveList(pos).size()) // stalemate bestValue = VALUE_DRAW; pos.state()->checkersBB = 0; } } // Save gathered info in transposition table. The static evaluation // is saved as it was before adjustment by correction history. ttWriter.write(posKey, value_to_tt(bestValue, ss->ply), pvHit, bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, DEPTH_QS, bestMove, unadjustedStaticEval, tt.generation()); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; } Depth Search::Worker::reduction(bool i, Depth d, int mn, int delta) const { int reductionScale = reductions[d] * reductions[mn]; return reductionScale - delta * 585 / rootDelta + !i * reductionScale * 206 / 512 + 1133; } // elapsed() returns the time elapsed since the search started. If the // 'nodestime' option is enabled, it will return the count of nodes searched // instead. This function is called to check whether the search should be // stopped based on predefined thresholds like time limits or nodes searched. // // elapsed_time() returns the actual time elapsed since the start of the search. // This function is intended for use only when printing PV outputs, and not used // for making decisions within the search algorithm itself. TimePoint Search::Worker::elapsed() const { return main_manager()->tm.elapsed([this]() { return threads.nodes_searched(); }); } TimePoint Search::Worker::elapsed_time() const { return main_manager()->tm.elapsed_time(); } Value Search::Worker::evaluate(const Position& pos) { return Eval::evaluate(networks[numaAccessToken], pos, accumulatorStack, refreshTable, optimism[pos.side_to_move()]); } namespace { // Adjusts a mate or TB score from "plies to mate from the root" to // "plies to mate from the current position". Standard scores are unchanged. // The function is called before storing a value in the transposition table. Value value_to_tt(Value v, int ply) { return is_win(v) ? v + ply : is_loss(v) ? v - ply : v; } // Inverse of value_to_tt(): it adjusts a mate or TB score from the transposition // table (which refers to the plies to mate/be mated from current position) to // "plies to mate/be mated (TB win/loss) from the root". However, to avoid // potentially false mate or TB scores related to the 50 moves rule and the // graph history interaction, we return the highest non-TB score instead. Value value_from_tt(Value v, int ply, int r50c) { if (!is_valid(v)) return VALUE_NONE; // handle TB win or better if (is_win(v)) { // Downgrade a potentially false mate score if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 100 - r50c) return VALUE_TB_WIN_IN_MAX_PLY - 1; // Downgrade a potentially false TB score. if (VALUE_TB - v > 100 - r50c) return VALUE_TB_WIN_IN_MAX_PLY - 1; return v - ply; } // handle TB loss or worse if (is_loss(v)) { // Downgrade a potentially false mate score. if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 100 - r50c) return VALUE_TB_LOSS_IN_MAX_PLY + 1; // Downgrade a potentially false TB score. if (VALUE_TB + v > 100 - r50c) return VALUE_TB_LOSS_IN_MAX_PLY + 1; return v + ply; } return v; } // Adds current move and appends child pv[] void update_pv(Move* pv, Move move, const Move* childPv) { for (*pv++ = move; childPv && *childPv != Move::none();) *pv++ = *childPv++; *pv = Move::none(); } // Updates stats at the end of search() when a bestMove is found void update_all_stats(const Position& pos, Stack* ss, Search::Worker& workerThread, Move bestMove, Square prevSq, SearchedList& quietsSearched, SearchedList& capturesSearched, Depth depth, Move ttMove) { CapturePieceToHistory& captureHistory = workerThread.captureHistory; Piece movedPiece = pos.moved_piece(bestMove); PieceType capturedPiece; int bonus = std::min(128 * depth - 77, 1529) + 353 * (bestMove == ttMove) + (ss - 1)->statScore / 32; int malus = std::min(882 * depth - 204, 2122); if (!pos.capture_stage(bestMove)) { update_quiet_histories(pos, ss, workerThread, bestMove, bonus * 806 / 1024); int actualMalus = malus * 1113 / 1024; // Decrease stats for all non-best quiet moves for (Move move : quietsSearched) { actualMalus = actualMalus * 977 / 1024; update_quiet_histories(pos, ss, workerThread, move, -actualMalus); } } else { // Increase stats for the best move in case it was a capture move capturedPiece = type_of(pos.piece_on(bestMove.to_sq())); captureHistory[movedPiece][bestMove.to_sq()][capturedPiece] << bonus * 1286 / 1024; } // Extra penalty for a quiet early move that was not a TT move in // previous ply when it gets refuted. if (prevSq != SQ_NONE && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit) && !pos.captured_piece()) update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -malus * 616 / 1024); // Decrease stats for all non-best capture moves for (Move move : capturesSearched) { movedPiece = pos.moved_piece(move); capturedPiece = type_of(pos.piece_on(move.to_sq())); captureHistory[movedPiece][move.to_sq()][capturedPiece] << -malus * 1559 / 1024; } } // Updates histories of the move pairs formed by moves // at ply -1, -2, -3, -4, and -6 with current move. void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { static constexpr std::array conthist_bonuses = { {{1, 1071}, {2, 753}, {3, 329}, {4, 539}, {5, 124}, {6, 434}}}; // Multipliers for positive history consistency constexpr int CMHCMultipliers[] = {96, 100, 100, 100, 115, 118, 129}; int positiveCount = 0; for (const auto [i, weight] : conthist_bonuses) { // Only update the first 2 continuation histories if we are in check if (ss->inCheck && i > 2) break; if (((ss - i)->currentMove).is_ok()) { auto& historyEntry = (*(ss - i)->continuationHistory)[pc][to]; if (historyEntry > 0) positiveCount++; int multiplier = CMHCMultipliers[positiveCount]; historyEntry << (bonus * weight * multiplier / 131072) + 73 * (i < 2); } } } // Updates move sorting heuristics void update_quiet_histories( const Position& pos, Stack* ss, Search::Worker& workerThread, Move move, int bonus) { Color us = pos.side_to_move(); workerThread.mainHistory[us][move.raw()] << bonus; // Untuned to prevent duplicate effort if (ss->ply < LOW_PLY_HISTORY_SIZE) workerThread.lowPlyHistory[ss->ply][move.raw()] << bonus * 682 / 1024; update_continuation_histories(ss, pos.moved_piece(move), move.to_sq(), bonus * 894 / 1024); workerThread.sharedHistory.pawn_entry(pos)[pos.moved_piece(move)][move.to_sq()] << bonus * (bonus > 0 ? 974 : 543) / 1024; } } // When playing with strength handicap, choose the best move among a set of // RootMoves using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. Move Skill::pick_best(const RootMoves& rootMoves, size_t multiPV) { static PRNG rng(now()); // PRNG sequence should be non-deterministic // RootMoves are already sorted by score in descending order Value topScore = rootMoves[0].score; int delta = std::min(topScore - rootMoves[multiPV - 1].score, int(PawnValue)); int maxScore = -VALUE_INFINITE; double weakness = 120 - 2 * level; // Choose best move. For each move score we add two terms, both dependent on // weakness. One is deterministic and bigger for weaker levels, and one is // random. Then we choose the move with the resulting highest score. for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula int push = int(weakness * int(topScore - rootMoves[i].score) + delta * (rng.rand() % int(weakness))) / 128; if (rootMoves[i].score + push >= maxScore) { maxScore = rootMoves[i].score + push; best = rootMoves[i].pv[0]; } } return best; } // Used to print debug info and, more importantly, to detect // when we are out of available time and thus stop the search. void SearchManager::check_time(Search::Worker& worker) { if (--callsCnt > 0) return; // When using nodes, ensure checking rate is not lower than 0.1% of nodes callsCnt = worker.limits.nodes ? std::min(512, int(worker.limits.nodes / 1024)) : 512; static TimePoint lastInfoTime = now(); TimePoint elapsed = tm.elapsed([&worker]() { return worker.threads.nodes_searched(); }); TimePoint tick = worker.limits.startTime + elapsed; if (tick - lastInfoTime >= 1000) { lastInfoTime = tick; dbg_print(); } // We should not stop pondering until told so by the GUI if (ponder) return; if ( // Later we rely on the fact that we can at least use the mainthread previous // root-search score and PV in a multithreaded environment to prove mated-in scores. worker.completedDepth >= 1 && ((worker.limits.use_time_management() && (elapsed > tm.maximum() || stopOnPonderhit)) || (worker.limits.movetime && elapsed >= worker.limits.movetime) || (worker.limits.nodes && worker.threads.nodes_searched() >= worker.limits.nodes))) worker.threads.stop = true; } // Used to correct and extend PVs for moves that have a TB (but not a mate) score. // Keeps the search based PV for as long as it is verified to maintain the game // outcome, truncates afterwards. Finally, extends to mate the PV, providing a // possible continuation (but not a proven mating line). void syzygy_extend_pv(const OptionsMap& options, const Search::LimitsType& limits, Position& pos, RootMove& rootMove, Value& v) { auto t_start = std::chrono::steady_clock::now(); int moveOverhead = int(options["Move Overhead"]); bool rule50 = bool(options["Syzygy50MoveRule"]); // Do not use more than moveOverhead / 2 time, if time management is active auto time_abort = [&t_start, &moveOverhead, &limits]() -> bool { auto t_end = std::chrono::steady_clock::now(); return limits.use_time_management() && 2 * std::chrono::duration(t_end - t_start).count() > moveOverhead; }; std::list sts; // Step 0, do the rootMove, no correction allowed, as needed for MultiPV in TB. auto& stRoot = sts.emplace_back(); pos.do_move(rootMove.pv[0], stRoot); int ply = 1; // Step 1, walk the PV to the last position in TB with correct decisive score while (size_t(ply) < rootMove.pv.size()) { Move& pvMove = rootMove.pv[ply]; RootMoves legalMoves; for (const auto& m : MoveList(pos)) legalMoves.emplace_back(m); Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, false, time_abort); RootMove& rm = *std::find(legalMoves.begin(), legalMoves.end(), pvMove); if (legalMoves[0].tbRank != rm.tbRank) break; ply++; auto& st = sts.emplace_back(); pos.do_move(pvMove, st); // Do not allow for repetitions or drawing moves along the PV in TB regime if (config.rootInTB && ((rule50 && pos.is_draw(ply)) || pos.is_repetition(ply))) { pos.undo_move(pvMove); ply--; break; } // Full PV shown will thus be validated and end in TB. // If we cannot validate the full PV in time, we do not show it. if (config.rootInTB && time_abort()) break; } // Resize the PV to the correct part rootMove.pv.resize(ply); // Step 2, now extend the PV to mate, as if the user explored syzygy-tables.info // using top ranked moves (minimal DTZ), which gives optimal mates only for simple // endgames e.g. KRvK. while (!(rule50 && pos.is_draw(0))) { if (time_abort()) break; RootMoves legalMoves; for (const auto& m : MoveList(pos)) { auto& rm = legalMoves.emplace_back(m); StateInfo tmpSI; pos.do_move(m, tmpSI); // Give a score of each move to break DTZ ties restricting opponent mobility, // but not giving the opponent a capture. for (const auto& mOpp : MoveList(pos)) rm.tbRank -= pos.capture(mOpp) ? 100 : 1; pos.undo_move(m); } // Mate found if (legalMoves.size() == 0) break; // Sort moves according to their above assigned rank. // This will break ties for moves with equal DTZ in rank_root_moves. std::stable_sort( legalMoves.begin(), legalMoves.end(), [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); // The winning side tries to minimize DTZ, the losing side maximizes it Tablebases::Config config = Tablebases::rank_root_moves(options, pos, legalMoves, true, time_abort); // If DTZ is not available we might not find a mate, so we bail out if (!config.rootInTB || config.cardinality > 0) break; ply++; Move& pvMove = legalMoves[0].pv[0]; rootMove.pv.push_back(pvMove); auto& st = sts.emplace_back(); pos.do_move(pvMove, st); } // Finding a draw in this function is an exceptional case, that cannot happen when rule50 is false or // during engine game play, since we have a winning score, and play correctly // with TB support. However, it can be that a position is draw due to the 50 move // rule if it has been been reached on the board with a non-optimal 50 move counter // (e.g. 8/8/6k1/3B4/3K4/4N3/8/8 w - - 54 106 ) which TB with dtz counter rounding // cannot always correctly rank. See also // https://github.com/official-stockfish/Stockfish/issues/5175#issuecomment-2058893495 // We adjust the score to match the found PV. Note that a TB loss score can be // displayed if the engine did not find a drawing move yet, but eventually search // will figure it out (e.g. 1kq5/q2r4/5K2/8/8/8/8/7Q w - - 96 1 ) if (pos.is_draw(0)) v = VALUE_DRAW; // Undo the PV moves for (auto it = rootMove.pv.rbegin(); it != rootMove.pv.rend(); ++it) pos.undo_move(*it); // Inform if we couldn't get a full extension in time if (time_abort()) sync_cout << "info string Syzygy based PV extension requires more time, increase Move Overhead as needed." << sync_endl; } void SearchManager::pv(Search::Worker& worker, const ThreadPool& threads, const TranspositionTable& tt, Depth depth) { const auto nodes = threads.nodes_searched(); auto& rootMoves = worker.rootMoves; auto& pos = worker.rootPos; size_t pvIdx = worker.pvIdx; size_t multiPV = std::min(size_t(worker.options["MultiPV"]), rootMoves.size()); uint64_t tbHits = threads.tb_hits() + (worker.tbConfig.rootInTB ? rootMoves.size() : 0); for (size_t i = 0; i < multiPV; ++i) { bool updated = rootMoves[i].score != -VALUE_INFINITE; if (depth == 1 && !updated && i > 0) continue; Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; if (v == -VALUE_INFINITE) v = VALUE_ZERO; bool tb = worker.tbConfig.rootInTB && std::abs(v) <= VALUE_TB; v = tb ? rootMoves[i].tbScore : v; bool isExact = i != pvIdx || tb || !updated; // tablebase- and previous-scores are exact // Potentially correct and extend the PV, and in exceptional cases v if (is_decisive(v) && std::abs(v) < VALUE_MATE_IN_MAX_PLY && ((!rootMoves[i].scoreLowerbound && !rootMoves[i].scoreUpperbound) || isExact)) syzygy_extend_pv(worker.options, worker.limits, pos, rootMoves[i], v); std::string pv; for (Move m : rootMoves[i].pv) pv += UCIEngine::move(m, pos.is_chess960()) + " "; // Remove last whitespace if (!pv.empty()) pv.pop_back(); auto wdl = worker.options["UCI_ShowWDL"] ? UCIEngine::wdl(v, pos) : ""; auto bound = rootMoves[i].scoreLowerbound ? "lowerbound" : (rootMoves[i].scoreUpperbound ? "upperbound" : ""); InfoFull info; info.depth = d; info.selDepth = rootMoves[i].selDepth; info.multiPV = i + 1; info.score = {v, pos}; info.wdl = wdl; if (!isExact) info.bound = bound; TimePoint time = std::max(TimePoint(1), tm.elapsed_time()); info.timeMs = time; info.nodes = nodes; info.nps = nodes * 1000 / time; info.tbHits = tbHits; info.pv = pv; info.hashfull = tt.hashfull(); updates.onUpdateFull(info); } } // Called in case we have no ponder move before exiting the search, // for instance, in case we stop the search during a fail high at root. // We try hard to have a ponder move to return to the GUI, // otherwise in case of 'ponder on' we have nothing to think about. bool RootMove::extract_ponder_from_tt(const TranspositionTable& tt, Position& pos) { StateInfo st; assert(pv.size() == 1); if (pv[0] == Move::none()) return false; pos.do_move(pv[0], st, &tt); auto [ttHit, ttData, ttWriter] = tt.probe(pos.key()); if (ttHit) { if (MoveList(pos).contains(ttData.move)) pv.push_back(ttData.move); } pos.undo_move(pv[0]); return pv.size() > 1; } } // namespace Stockfish ================================================ FILE: src/search.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef SEARCH_H_INCLUDED #define SEARCH_H_INCLUDED #include #include #include #include #include #include #include #include #include #include #include #include #include "history.h" #include "misc.h" #include "nnue/network.h" #include "nnue/nnue_accumulator.h" #include "numa.h" #include "position.h" #include "score.h" #include "syzygy/tbprobe.h" #include "timeman.h" #include "types.h" namespace Stockfish { // Different node types, used as a template parameter enum NodeType { NonPV, PV, Root }; class TranspositionTable; class ThreadPool; class OptionsMap; namespace Search { // Stack struct keeps track of the information we need to remember from nodes // shallower and deeper in the tree during the search. Each search thread has // its own array of Stack objects, indexed by the current ply. struct Stack { Move* pv; PieceToHistory* continuationHistory; CorrectionHistory* continuationCorrectionHistory; int ply; Move currentMove; Move excludedMove; Value staticEval; int statScore; int moveCount; bool inCheck; bool ttPv; bool ttHit; bool followPV; int cutoffCnt; int reduction; }; // RootMove struct is used for moves at the root of the tree. For each root move // we store a score and a PV (really a refutation in the case of moves which // fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves. struct RootMove { explicit RootMove(Move m) : pv(1, m) {} bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos); bool operator==(const Move& m) const { return pv[0] == m; } // Sort in descending order bool operator<(const RootMove& m) const { return m.score != score ? m.score < score : m.previousScore < previousScore; } uint64_t effort = 0; Value score = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE; Value averageScore = -VALUE_INFINITE; Value meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE; Value uciScore = -VALUE_INFINITE; bool scoreLowerbound = false; bool scoreUpperbound = false; int selDepth = 0; int tbRank = 0; Value tbScore; std::vector pv; }; using RootMoves = std::vector; // LimitsType struct stores information sent by the caller about the analysis required. struct LimitsType { // Init explicitly due to broken value-initialization of non POD in MSVC LimitsType() { time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); movestogo = depth = mate = perft = infinite = 0; nodes = 0; ponderMode = false; } bool use_time_management() const { return time[WHITE] || time[BLACK]; } std::vector searchmoves; TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; int movestogo, depth, mate, perft, infinite; uint64_t nodes; bool ponderMode; }; // The UCI stores the uci options, thread pool, and transposition table. // This struct is used to easily forward data to the Search::Worker class. struct SharedState { SharedState(const OptionsMap& optionsMap, ThreadPool& threadPool, TranspositionTable& transpositionTable, std::map& sharedHists, const LazyNumaReplicatedSystemWide& nets) : options(optionsMap), threads(threadPool), tt(transpositionTable), sharedHistories(sharedHists), networks(nets) {} const OptionsMap& options; ThreadPool& threads; TranspositionTable& tt; std::map& sharedHistories; const LazyNumaReplicatedSystemWide& networks; }; class Worker; // Null Object Pattern, implement a common interface for the SearchManagers. // A Null Object will be given to non-mainthread workers. class ISearchManager { public: virtual ~ISearchManager() {} virtual void check_time(Search::Worker&) = 0; }; struct InfoShort { int depth; Score score; }; struct InfoFull: InfoShort { int selDepth; size_t multiPV; std::string_view wdl; std::string_view bound; size_t timeMs; size_t nodes; size_t nps; size_t tbHits; std::string_view pv; int hashfull; }; struct InfoIteration { int depth; std::string_view currmove; size_t currmovenumber; }; // Skill structure is used to implement strength limit. If we have a UCI_Elo, // we convert it to an appropriate skill level, anchored to the Stash engine. // This method is based on a fit of the Elo results for games played between // Stockfish at various skill levels and various versions of the Stash engine. // Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately // Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 struct Skill { // Lowest and highest Elo ratings used in the skill level calculation constexpr static int LowestElo = 1320; constexpr static int HighestElo = 3190; Skill(int skill_level, int uci_elo) { if (uci_elo) { double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo); level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); } else level = double(skill_level); } bool enabled() const { return level < 20.0; } bool time_to_pick(Depth depth) const { return depth == 1 + int(level); } Move pick_best(const RootMoves&, size_t multiPV); double level; Move best = Move::none(); }; // SearchManager manages the search from the main thread. It is responsible for // keeping track of the time, and storing data strictly related to the main thread. class SearchManager: public ISearchManager { public: using UpdateShort = std::function; using UpdateFull = std::function; using UpdateIter = std::function; using UpdateBestmove = std::function; struct UpdateContext { UpdateShort onUpdateNoMoves; UpdateFull onUpdateFull; UpdateIter onIter; UpdateBestmove onBestmove; }; SearchManager(const UpdateContext& updateContext) : updates(updateContext) {} void check_time(Search::Worker& worker) override; void pv(Search::Worker& worker, const ThreadPool& threads, const TranspositionTable& tt, Depth depth); Stockfish::TimeManagement tm; double originalTimeAdjust; int callsCnt; std::atomic_bool ponder; std::array iterValue; double previousTimeReduction; Value bestPreviousScore; Value bestPreviousAverageScore; bool stopOnPonderhit; size_t id; const UpdateContext& updates; }; class NullSearchManager: public ISearchManager { public: void check_time(Search::Worker&) override {} }; // Search::Worker is the class that does the actual search. // It is instantiated once per thread, and it is responsible for keeping track // of the search history, and storing data required for the search. class Worker { public: Worker(SharedState&, std::unique_ptr, size_t, size_t, size_t, NumaReplicatedAccessToken); // Called at instantiation to initialize reductions tables. // Reset histories, usually before a new game. void clear(); // Called when the program receives the UCI 'go' command. // It searches from the root position and outputs the "bestmove". void start_searching(); bool is_mainthread() const { return threadIdx == 0; } void ensure_network_replicated(); // Public because they need to be updatable by the stats ButterflyHistory mainHistory; LowPlyHistory lowPlyHistory; CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; CorrectionHistory continuationCorrectionHistory; TTMoveHistory ttMoveHistory; SharedHistories& sharedHistory; private: void iterative_deepening(); void do_move(Position& pos, const Move move, StateInfo& st, Stack* const ss); void do_move(Position& pos, const Move move, StateInfo& st, const bool givesCheck, Stack* const ss); void do_null_move(Position& pos, StateInfo& st, Stack* const ss); void undo_move(Position& pos, const Move move); void undo_null_move(Position& pos); // This is the main search function, for both PV and non-PV nodes template Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); // Quiescence search function, which is called by the main search template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta); Depth reduction(bool i, Depth d, int mn, int delta) const; // Pointer to the search manager, only allowed to be called by the main thread SearchManager* main_manager() const { assert(threadIdx == 0); return static_cast(manager.get()); } TimePoint elapsed() const; TimePoint elapsed_time() const; Value evaluate(const Position&); LimitsType limits; size_t pvIdx, pvLast; std::atomic nodes, tbHits, bestMoveChanges; int selDepth, nmpMinPly; Value optimism[COLOR_NB]; Position rootPos; StateInfo rootState; RootMoves rootMoves; Depth rootDepth, completedDepth; Value rootDelta; std::vector lastIterationPV; size_t threadIdx, numaThreadIdx, numaTotal; NumaReplicatedAccessToken numaAccessToken; // Reductions lookup table initialized at startup std::array reductions; // [depth or moveNumber] // The main thread has a SearchManager, the others have a NullSearchManager std::unique_ptr manager; Tablebases::Config tbConfig; const OptionsMap& options; ThreadPool& threads; TranspositionTable& tt; const LazyNumaReplicatedSystemWide& networks; // Used by NNUE Eval::NNUE::AccumulatorStack accumulatorStack; Eval::NNUE::AccumulatorCaches refreshTable; friend class Stockfish::ThreadPool; friend class SearchManager; }; struct ConthistBonus { int index; int weight; }; } // namespace Search } // namespace Stockfish #endif // #ifndef SEARCH_H_INCLUDED ================================================ FILE: src/shm.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef SHM_H_INCLUDED #define SHM_H_INCLUDED #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__linux__) && !defined(__ANDROID__) #include "shm_linux.h" #endif #if defined(__ANDROID__) #include #define SF_MAX_SEM_NAME_LEN NAME_MAX #endif #include "types.h" #include "memory.h" #if defined(_WIN32) #if _WIN32_WINNT < 0x0601 #undef _WIN32_WINNT #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes #endif #if !defined(NOMINMAX) #define NOMINMAX #endif #include #elif defined(__linux__) #include #include #include #include #include #include #include #endif #if defined(__APPLE__) #include #include #elif defined(__sun) #include #elif defined(__FreeBSD__) #include #include #include #elif defined(__NetBSD__) || defined(__DragonFly__) || defined(__linux__) #include #include #endif namespace Stockfish { // argv[0] CANNOT be used because we need to identify the executable. // argv[0] contains the command used to invoke it, which does not involve the full path. // Just using a path is not fully resilient either, as the executable could // have changed if it wasn't locked by the OS. Ideally we would hash the executable // but it's not really that important at this point. // If the path is longer than 4095 bytes the hash will be computed from an unspecified // amount of bytes of the path; in particular it can a hash of an empty string. inline std::string getExecutablePathHash() { char executable_path[4096] = {0}; std::size_t path_length = 0; #if defined(_WIN32) path_length = GetModuleFileNameA(NULL, executable_path, sizeof(executable_path)); #elif defined(__APPLE__) uint32_t size = sizeof(executable_path); if (_NSGetExecutablePath(executable_path, &size) == 0) { path_length = std::strlen(executable_path); } #elif defined(__sun) // Solaris const char* path = getexecname(); if (path) { std::strncpy(executable_path, path, sizeof(executable_path) - 1); path_length = std::strlen(executable_path); } #elif defined(__FreeBSD__) size_t size = sizeof(executable_path); int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; if (sysctl(mib, 4, executable_path, &size, NULL, 0) == 0) { path_length = std::strlen(executable_path); } #elif defined(__NetBSD__) || defined(__DragonFly__) ssize_t len = readlink("/proc/curproc/exe", executable_path, sizeof(executable_path) - 1); if (len >= 0) { executable_path[len] = '\0'; path_length = len; } #elif defined(__linux__) ssize_t len = readlink("/proc/self/exe", executable_path, sizeof(executable_path) - 1); if (len >= 0) { executable_path[len] = '\0'; path_length = len; } #endif // In case of any error the path will be empty. return std::string(executable_path, path_length); } enum class SystemWideSharedConstantAllocationStatus { NoAllocation, LocalMemory, SharedMemory }; #if defined(_WIN32) inline std::string GetLastErrorAsString(DWORD error) { //Get the error message ID, if any. DWORD errorMessageID = error; if (errorMessageID == 0) { return std::string(); //No error message has been recorded } LPSTR messageBuffer = nullptr; //Ask Win32 to give us the string version of that message ID. //The parameters we pass in, tell Win32 to create the buffer that holds the message for us (because we don't yet know how long the message string will be). size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR) &messageBuffer, 0, NULL); //Copy the error message into a std::string. std::string message(messageBuffer, size); //Free the Win32's string's buffer. LocalFree(messageBuffer); return message; } // Utilizes shared memory to store the value. It is deduplicated system-wide (for the single user). template class SharedMemoryBackend { public: enum class Status { Success, LargePageAllocationError, FileMappingError, MapViewError, MutexCreateError, MutexWaitError, MutexReleaseError, NotInitialized }; static constexpr DWORD IS_INITIALIZED_VALUE = 1; SharedMemoryBackend() : status(Status::NotInitialized) {}; SharedMemoryBackend(const std::string& shm_name, const T& value) : status(Status::NotInitialized) { initialize(shm_name, value); } bool is_valid() const { return status == Status::Success; } std::optional get_error_message() const { switch (status) { case Status::Success : return std::nullopt; case Status::LargePageAllocationError : return "Failed to allocate large page memory"; case Status::FileMappingError : return "Failed to create file mapping: " + last_error_message; case Status::MapViewError : return "Failed to map view: " + last_error_message; case Status::MutexCreateError : return "Failed to create mutex: " + last_error_message; case Status::MutexWaitError : return "Failed to wait on mutex: " + last_error_message; case Status::MutexReleaseError : return "Failed to release mutex: " + last_error_message; case Status::NotInitialized : return "Not initialized"; default : return "Unknown error"; } } void* get() const { return is_valid() ? pMap : nullptr; } ~SharedMemoryBackend() { cleanup(); } SharedMemoryBackend(const SharedMemoryBackend&) = delete; SharedMemoryBackend& operator=(const SharedMemoryBackend&) = delete; SharedMemoryBackend(SharedMemoryBackend&& other) noexcept : pMap(other.pMap), hMapFile(other.hMapFile), status(other.status), last_error_message(std::move(other.last_error_message)) { other.pMap = nullptr; other.hMapFile = 0; other.status = Status::NotInitialized; } SharedMemoryBackend& operator=(SharedMemoryBackend&& other) noexcept { if (this != &other) { cleanup(); pMap = other.pMap; hMapFile = other.hMapFile; status = other.status; last_error_message = std::move(other.last_error_message); other.pMap = nullptr; other.hMapFile = 0; other.status = Status::NotInitialized; } return *this; } SystemWideSharedConstantAllocationStatus get_status() const { return status == Status::Success ? SystemWideSharedConstantAllocationStatus::SharedMemory : SystemWideSharedConstantAllocationStatus::NoAllocation; } private: void initialize(const std::string& shm_name, const T& value) { const size_t total_size = sizeof(T) + sizeof(IS_INITIALIZED_VALUE); // Try allocating with large pages first. hMapFile = windows_try_with_large_page_priviliges( [&](size_t largePageSize) { const size_t total_size_aligned = (total_size + largePageSize - 1) / largePageSize * largePageSize; #if defined(_WIN64) DWORD total_size_low = total_size_aligned & 0xFFFFFFFFu; DWORD total_size_high = total_size_aligned >> 32u; #else DWORD total_size_low = total_size_aligned; DWORD total_size_high = 0; #endif return CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES, total_size_high, total_size_low, shm_name.c_str()); }, []() { return (void*) nullptr; }); // Fallback to normal allocation if no large pages available. if (!hMapFile) { hMapFile = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, static_cast(total_size), shm_name.c_str()); } if (!hMapFile) { const DWORD err = GetLastError(); last_error_message = GetLastErrorAsString(err); status = Status::FileMappingError; return; } pMap = MapViewOfFile(hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, total_size); if (!pMap) { const DWORD err = GetLastError(); last_error_message = GetLastErrorAsString(err); status = Status::MapViewError; cleanup_partial(); return; } // Use named mutex to ensure only one initializer std::string mutex_name = shm_name + "$mutex"; HANDLE hMutex = CreateMutexA(NULL, FALSE, mutex_name.c_str()); if (!hMutex) { const DWORD err = GetLastError(); last_error_message = GetLastErrorAsString(err); status = Status::MutexCreateError; cleanup_partial(); return; } DWORD wait_result = WaitForSingleObject(hMutex, INFINITE); if (wait_result != WAIT_OBJECT_0) { const DWORD err = GetLastError(); last_error_message = GetLastErrorAsString(err); status = Status::MutexWaitError; CloseHandle(hMutex); cleanup_partial(); return; } // Crucially, we place the object first to ensure alignment. volatile DWORD* is_initialized = std::launder(reinterpret_cast(reinterpret_cast(pMap) + sizeof(T))); T* object = std::launder(reinterpret_cast(pMap)); if (*is_initialized != IS_INITIALIZED_VALUE) { // First time initialization, message for debug purposes new (object) T{value}; *is_initialized = IS_INITIALIZED_VALUE; } BOOL release_result = ReleaseMutex(hMutex); CloseHandle(hMutex); if (!release_result) { const DWORD err = GetLastError(); last_error_message = GetLastErrorAsString(err); status = Status::MutexReleaseError; cleanup_partial(); return; } status = Status::Success; } void cleanup_partial() { if (pMap != nullptr) { UnmapViewOfFile(pMap); pMap = nullptr; } if (hMapFile) { CloseHandle(hMapFile); hMapFile = 0; } } void cleanup() { if (pMap != nullptr) { UnmapViewOfFile(pMap); pMap = nullptr; } if (hMapFile) { CloseHandle(hMapFile); hMapFile = 0; } } void* pMap = nullptr; HANDLE hMapFile = 0; Status status = Status::NotInitialized; std::string last_error_message; }; #elif defined(__linux__) && !defined(__ANDROID__) template class SharedMemoryBackend { public: SharedMemoryBackend() = default; SharedMemoryBackend(const std::string& shm_name, const T& value) : shm1(shm::create_shared(shm_name, value)) {} void* get() const { const T* ptr = &shm1->get(); return reinterpret_cast(const_cast(ptr)); } bool is_valid() const { return shm1 && shm1->is_open() && shm1->is_initialized(); } SystemWideSharedConstantAllocationStatus get_status() const { return is_valid() ? SystemWideSharedConstantAllocationStatus::SharedMemory : SystemWideSharedConstantAllocationStatus::NoAllocation; } std::optional get_error_message() const { if (!shm1) return "Shared memory not initialized"; if (!shm1->is_open()) return "Shared memory is not open"; if (!shm1->is_initialized()) return "Not initialized"; return std::nullopt; } private: std::optional> shm1; }; #else // For systems that don't have shared memory, or support is troublesome. // The way fallback is done is that we need a dummy backend. template class SharedMemoryBackend { public: SharedMemoryBackend() = default; SharedMemoryBackend([[maybe_unused]] const std::string& shm_name, [[maybe_unused]] const T& value) {} void* get() const { return nullptr; } bool is_valid() const { return false; } SystemWideSharedConstantAllocationStatus get_status() const { return SystemWideSharedConstantAllocationStatus::NoAllocation; } std::optional get_error_message() const { return "Dummy SharedMemoryBackend"; } }; #endif template struct SharedMemoryBackendFallback { SharedMemoryBackendFallback() = default; SharedMemoryBackendFallback(const std::string&, const T& value) : fallback_object(make_unique_large_page(value)) {} void* get() const { return fallback_object.get(); } SharedMemoryBackendFallback(const SharedMemoryBackendFallback&) = delete; SharedMemoryBackendFallback& operator=(const SharedMemoryBackendFallback&) = delete; SharedMemoryBackendFallback(SharedMemoryBackendFallback&& other) noexcept : fallback_object(std::move(other.fallback_object)) {} SharedMemoryBackendFallback& operator=(SharedMemoryBackendFallback&& other) noexcept { fallback_object = std::move(other.fallback_object); return *this; } SystemWideSharedConstantAllocationStatus get_status() const { return fallback_object == nullptr ? SystemWideSharedConstantAllocationStatus::NoAllocation : SystemWideSharedConstantAllocationStatus::LocalMemory; } std::optional get_error_message() const { if (fallback_object == nullptr) return "Not initialized"; return "Shared memory not supported by the OS. Local allocation fallback."; } private: LargePagePtr fallback_object; }; // Platform-independent wrapper template struct SystemWideSharedConstant { private: static std::string createHashString(const std::string& input) { char buf[1024]; std::snprintf(buf, sizeof(buf), "%016" PRIx64, hash_string(input)); return buf; } public: // We can't run the destructor because it may be in a completely different process. // The object stored must also be obviously in-line but we can't check for that, other than some basic checks that cover most cases. static_assert(std::is_trivially_destructible_v); static_assert(std::is_trivially_move_constructible_v); static_assert(std::is_trivially_copy_constructible_v); SystemWideSharedConstant() = default; // Content is addressed by its hash. An additional discriminator can be added to account for differences // that are not present in the content, for example NUMA node allocation. SystemWideSharedConstant(const T& value, std::size_t discriminator = 0) { std::size_t content_hash = std::hash{}(value); std::size_t executable_hash = hash_string(getExecutablePathHash()); char buf[1024]; std::snprintf(buf, sizeof(buf), "Local\\sf_%zu$%zu$%zu", content_hash, executable_hash, discriminator); std::string shm_name = buf; #if defined(__linux__) && !defined(__ANDROID__) // POSIX shared memory names must start with a slash shm_name = "/sf_" + createHashString(shm_name); // hash name and make sure it is not longer than SF_MAX_SEM_NAME_LEN if (shm_name.size() > SF_MAX_SEM_NAME_LEN) { shm_name = shm_name.substr(0, SF_MAX_SEM_NAME_LEN - 1); } #endif SharedMemoryBackend shm_backend(shm_name, value); if (shm_backend.is_valid()) { backend = std::move(shm_backend); } else { backend = SharedMemoryBackendFallback(shm_name, value); } } SystemWideSharedConstant(const SystemWideSharedConstant&) = delete; SystemWideSharedConstant& operator=(const SystemWideSharedConstant&) = delete; SystemWideSharedConstant(SystemWideSharedConstant&& other) noexcept : backend(std::move(other.backend)) {} SystemWideSharedConstant& operator=(SystemWideSharedConstant&& other) noexcept { backend = std::move(other.backend); return *this; } const T& operator*() const { return *std::launder(reinterpret_cast(get_ptr())); } bool operator==(std::nullptr_t) const noexcept { return get_ptr() == nullptr; } bool operator!=(std::nullptr_t) const noexcept { return get_ptr() != nullptr; } SystemWideSharedConstantAllocationStatus get_status() const { return std::visit( [](const auto& end) -> SystemWideSharedConstantAllocationStatus { if constexpr (std::is_same_v, std::monostate>) { return SystemWideSharedConstantAllocationStatus::NoAllocation; } else { return end.get_status(); } }, backend); } std::optional get_error_message() const { return std::visit( [](const auto& end) -> std::optional { if constexpr (std::is_same_v, std::monostate>) { return std::nullopt; } else { return end.get_error_message(); } }, backend); } private: auto get_ptr() const { return std::visit( [](const auto& end) -> void* { if constexpr (std::is_same_v, std::monostate>) { return nullptr; } else { return end.get(); } }, backend); } std::variant, SharedMemoryBackendFallback> backend; }; } // namespace Stockfish #endif // #ifndef SHM_H_INCLUDED ================================================ FILE: src/shm_linux.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef SHM_LINUX_H_INCLUDED #define SHM_LINUX_H_INCLUDED #if !defined(__linux__) || defined(__ANDROID__) #error shm_linux.h should not be included on this platform. #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SF_MAX_SEM_NAME_LEN NAME_MAX #include "misc.h" namespace Stockfish::shm { namespace detail { struct ShmHeader { static constexpr uint32_t SHM_MAGIC = 0xAD5F1A12; pthread_mutex_t mutex; std::atomic ref_count{0}; std::atomic initialized{false}; uint32_t magic = SHM_MAGIC; }; class SharedMemoryBase { public: virtual ~SharedMemoryBase() = default; virtual void close(bool skip_unmap = false) noexcept = 0; virtual const std::string& name() const noexcept = 0; }; class SharedMemoryRegistry { private: static std::mutex registry_mutex_; static std::vector active_instances_; public: static void register_instance(SharedMemoryBase* instance) { std::scoped_lock lock(registry_mutex_); active_instances_.push_back(instance); } static void unregister_instance(SharedMemoryBase* instance) { std::scoped_lock lock(registry_mutex_); active_instances_.erase( std::remove(active_instances_.begin(), active_instances_.end(), instance), active_instances_.end()); } static void cleanup_all(bool skip_unmap = false) noexcept { std::scoped_lock lock(registry_mutex_); for (auto* instance : active_instances_) instance->close(skip_unmap); active_instances_.clear(); } }; inline std::mutex SharedMemoryRegistry::registry_mutex_; inline std::vector SharedMemoryRegistry::active_instances_; class CleanupHooks { private: static std::once_flag register_once_; static void handle_signal(int sig) noexcept { // Search threads may still be running, so skip munmap (but still perform // other cleanup actions). The memory mappings will be released on exit. SharedMemoryRegistry::cleanup_all(true); // Invoke the default handler, which will exit struct sigaction sa; sa.sa_handler = SIG_DFL; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; if (sigaction(sig, &sa, nullptr) == -1) _Exit(128 + sig); raise(sig); } static void register_signal_handlers() noexcept { std::atexit([]() { SharedMemoryRegistry::cleanup_all(true); }); constexpr int signals[] = {SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGABRT, SIGFPE, SIGSEGV, SIGTERM, SIGBUS, SIGSYS, SIGXCPU, SIGXFSZ}; struct sigaction sa; sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; for (int sig : signals) sigaction(sig, &sa, nullptr); } public: static void ensure_registered() noexcept { std::call_once(register_once_, register_signal_handlers); } }; inline std::once_flag CleanupHooks::register_once_; inline int portable_fallocate(int fd, off_t offset, off_t length) { #ifdef __APPLE__ fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, offset, length, 0}; int ret = fcntl(fd, F_PREALLOCATE, &store); if (ret == -1) { store.fst_flags = F_ALLOCATEALL; ret = fcntl(fd, F_PREALLOCATE, &store); } if (ret != -1) ret = ftruncate(fd, offset + length); return ret; #else return posix_fallocate(fd, offset, length); #endif } } // namespace detail template class SharedMemory: public detail::SharedMemoryBase { static_assert(std::is_trivially_copyable_v, "T must be trivially copyable"); static_assert(!std::is_pointer_v, "T cannot be a pointer type"); private: std::string name_; int fd_ = -1; void* mapped_ptr_ = nullptr; T* data_ptr_ = nullptr; detail::ShmHeader* header_ptr_ = nullptr; size_t total_size_ = 0; std::string sentinel_base_; std::string sentinel_path_; static constexpr size_t calculate_total_size() noexcept { return sizeof(T) + sizeof(detail::ShmHeader); } static std::string make_sentinel_base(const std::string& name) { char buf[32]; // Using std::to_string here causes non-deterministic PGO builds. // snprintf, being part of libc, is insensitive to the formatted values. std::snprintf(buf, sizeof(buf), "sfshm_%016" PRIu64, hash_string(name)); return buf; } public: explicit SharedMemory(const std::string& name) noexcept : name_(name), total_size_(calculate_total_size()), sentinel_base_(make_sentinel_base(name)) {} ~SharedMemory() noexcept override { detail::SharedMemoryRegistry::unregister_instance(this); close(); } SharedMemory(const SharedMemory&) = delete; SharedMemory& operator=(const SharedMemory&) = delete; SharedMemory(SharedMemory&& other) noexcept : name_(std::move(other.name_)), fd_(other.fd_), mapped_ptr_(other.mapped_ptr_), data_ptr_(other.data_ptr_), header_ptr_(other.header_ptr_), total_size_(other.total_size_), sentinel_base_(std::move(other.sentinel_base_)), sentinel_path_(std::move(other.sentinel_path_)) { detail::SharedMemoryRegistry::unregister_instance(&other); detail::SharedMemoryRegistry::register_instance(this); other.reset(); } SharedMemory& operator=(SharedMemory&& other) noexcept { if (this != &other) { detail::SharedMemoryRegistry::unregister_instance(this); close(); name_ = std::move(other.name_); fd_ = other.fd_; mapped_ptr_ = other.mapped_ptr_; data_ptr_ = other.data_ptr_; header_ptr_ = other.header_ptr_; total_size_ = other.total_size_; sentinel_base_ = std::move(other.sentinel_base_); sentinel_path_ = std::move(other.sentinel_path_); detail::SharedMemoryRegistry::unregister_instance(&other); detail::SharedMemoryRegistry::register_instance(this); other.reset(); } return *this; } [[nodiscard]] bool open(const T& initial_value) noexcept { detail::CleanupHooks::ensure_registered(); bool retried_stale = false; while (true) { if (is_open()) return false; bool created_new = false; fd_ = shm_open(name_.c_str(), O_CREAT | O_EXCL | O_RDWR, 0666); if (fd_ == -1) { fd_ = shm_open(name_.c_str(), O_RDWR, 0666); if (fd_ == -1) return false; } else created_new = true; if (!lock_file(LOCK_EX)) { ::close(fd_); reset(); return false; } bool invalid_header = false; bool success = created_new ? setup_new_region(initial_value) : setup_existing_region(invalid_header); if (!success) { if (created_new || invalid_header) shm_unlink(name_.c_str()); if (mapped_ptr_) unmap_region(); unlock_file(); ::close(fd_); reset(); if (!created_new && invalid_header && !retried_stale) { retried_stale = true; continue; } return false; } if (!lock_shared_mutex()) { if (created_new) shm_unlink(name_.c_str()); if (mapped_ptr_) unmap_region(); unlock_file(); ::close(fd_); reset(); if (!created_new && !retried_stale) { retried_stale = true; continue; } return false; } if (!create_sentinel_file_locked()) { unlock_shared_mutex(); unmap_region(); if (created_new) shm_unlink(name_.c_str()); unlock_file(); ::close(fd_); reset(); return false; } header_ptr_->ref_count.fetch_add(1, std::memory_order_acq_rel); unlock_shared_mutex(); unlock_file(); detail::SharedMemoryRegistry::register_instance(this); return true; } } void close(bool skip_unmap = false) noexcept override { if (fd_ == -1 && mapped_ptr_ == nullptr) return; bool remove_region = false; bool file_locked = lock_file(LOCK_EX); bool mutex_locked = false; if (file_locked && header_ptr_ != nullptr) mutex_locked = lock_shared_mutex(); if (mutex_locked) { if (header_ptr_) { header_ptr_->ref_count.fetch_sub(1, std::memory_order_acq_rel); } remove_sentinel_file(); remove_region = !has_other_live_sentinels_locked(); unlock_shared_mutex(); } else { remove_sentinel_file(); decrement_refcount_relaxed(); } if (skip_unmap) mapped_ptr_ = nullptr; else unmap_region(); if (remove_region) shm_unlink(name_.c_str()); if (file_locked) unlock_file(); if (fd_ != -1) { ::close(fd_); fd_ = -1; } if (!skip_unmap) reset(); } const std::string& name() const noexcept override { return name_; } [[nodiscard]] bool is_open() const noexcept { return fd_ != -1 && mapped_ptr_ && data_ptr_; } [[nodiscard]] const T& get() const noexcept { return *data_ptr_; } [[nodiscard]] const T* operator->() const noexcept { return data_ptr_; } [[nodiscard]] const T& operator*() const noexcept { return *data_ptr_; } [[nodiscard]] uint32_t ref_count() const noexcept { return header_ptr_ ? header_ptr_->ref_count.load(std::memory_order_acquire) : 0; } [[nodiscard]] bool is_initialized() const noexcept { return header_ptr_ ? header_ptr_->initialized.load(std::memory_order_acquire) : false; } static void cleanup_all_instances() noexcept { detail::SharedMemoryRegistry::cleanup_all(); } private: void reset() noexcept { fd_ = -1; mapped_ptr_ = nullptr; data_ptr_ = nullptr; header_ptr_ = nullptr; sentinel_path_.clear(); } void unmap_region() noexcept { if (mapped_ptr_) { munmap(mapped_ptr_, total_size_); mapped_ptr_ = nullptr; data_ptr_ = nullptr; header_ptr_ = nullptr; } } [[nodiscard]] bool lock_file(int operation) noexcept { if (fd_ == -1) return false; while (flock(fd_, operation) == -1) { if (errno == EINTR) continue; return false; } return true; } void unlock_file() noexcept { if (fd_ == -1) return; while (flock(fd_, LOCK_UN) == -1) { if (errno == EINTR) continue; break; } } std::string sentinel_full_path(pid_t pid) const { char buf[1024]; // See above snprintf comment std::snprintf(buf, sizeof(buf), "/dev/shm/%s.%ld", sentinel_base_.c_str(), long(pid)); return buf; } void decrement_refcount_relaxed() noexcept { if (!header_ptr_) return; uint32_t expected = header_ptr_->ref_count.load(std::memory_order_relaxed); while (expected != 0 && !header_ptr_->ref_count.compare_exchange_weak( expected, expected - 1, std::memory_order_acq_rel, std::memory_order_relaxed)) {} } bool create_sentinel_file_locked() noexcept { if (!header_ptr_) return false; const pid_t self_pid = getpid(); sentinel_path_ = sentinel_full_path(self_pid); for (int attempt = 0; attempt < 2; ++attempt) { int fd = ::open(sentinel_path_.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0600); if (fd != -1) { ::close(fd); return true; } if (errno == EEXIST) { ::unlink(sentinel_path_.c_str()); decrement_refcount_relaxed(); continue; } break; } sentinel_path_.clear(); return false; } void remove_sentinel_file() noexcept { if (!sentinel_path_.empty()) { ::unlink(sentinel_path_.c_str()); sentinel_path_.clear(); } } static bool pid_is_alive(pid_t pid) noexcept { if (pid <= 0) return false; if (kill(pid, 0) == 0) return true; return errno == EPERM; } [[nodiscard]] bool initialize_shared_mutex() noexcept { if (!header_ptr_) return false; pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) return false; bool success = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) == 0; #if _POSIX_C_SOURCE >= 200809L if (success) success = pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) == 0; #endif if (success) success = pthread_mutex_init(&header_ptr_->mutex, &attr) == 0; pthread_mutexattr_destroy(&attr); return success; } [[nodiscard]] bool lock_shared_mutex() noexcept { if (!header_ptr_) return false; while (true) { int rc = pthread_mutex_lock(&header_ptr_->mutex); if (rc == 0) return true; #if _POSIX_C_SOURCE >= 200809L if (rc == EOWNERDEAD) { if (pthread_mutex_consistent(&header_ptr_->mutex) == 0) return true; return false; } #endif if (rc == EINTR) continue; return false; } } void unlock_shared_mutex() noexcept { if (header_ptr_) pthread_mutex_unlock(&header_ptr_->mutex); } bool has_other_live_sentinels_locked() const noexcept { DIR* dir = opendir("/dev/shm"); if (!dir) return false; std::string prefix = sentinel_base_ + "."; bool found = false; while (dirent* entry = readdir(dir)) { std::string name = entry->d_name; if (name.rfind(prefix, 0) != 0) continue; auto pid_str = name.substr(prefix.size()); char* end = nullptr; long value = std::strtol(pid_str.c_str(), &end, 10); if (!end || *end != '\0') continue; pid_t pid = static_cast(value); if (pid_is_alive(pid)) { found = true; break; } std::string stale_path = std::string("/dev/shm/") + name; ::unlink(stale_path.c_str()); const_cast(this)->decrement_refcount_relaxed(); } closedir(dir); return found; } [[nodiscard]] bool setup_new_region(const T& initial_value) noexcept { if (ftruncate(fd_, static_cast(total_size_)) == -1) return false; if (detail::portable_fallocate(fd_, 0, static_cast(total_size_)) != 0) return false; mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); if (mapped_ptr_ == MAP_FAILED) { mapped_ptr_ = nullptr; return false; } data_ptr_ = static_cast(mapped_ptr_); header_ptr_ = reinterpret_cast(static_cast(mapped_ptr_) + sizeof(T)); new (header_ptr_) detail::ShmHeader{}; new (data_ptr_) T{initial_value}; if (!initialize_shared_mutex()) return false; header_ptr_->ref_count.store(0, std::memory_order_release); header_ptr_->initialized.store(true, std::memory_order_release); return true; } [[nodiscard]] bool setup_existing_region(bool& invalid_header) noexcept { invalid_header = false; struct stat st; fstat(fd_, &st); if (static_cast(st.st_size) < total_size_) { invalid_header = true; return false; } mapped_ptr_ = mmap(nullptr, total_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); if (mapped_ptr_ == MAP_FAILED) { mapped_ptr_ = nullptr; return false; } data_ptr_ = static_cast(mapped_ptr_); header_ptr_ = std::launder( reinterpret_cast(static_cast(mapped_ptr_) + sizeof(T))); if (!header_ptr_->initialized.load(std::memory_order_acquire) || header_ptr_->magic != detail::ShmHeader::SHM_MAGIC) { invalid_header = true; unmap_region(); return false; } return true; } }; template [[nodiscard]] std::optional> create_shared(const std::string& name, const T& initial_value) noexcept { SharedMemory shm(name); if (shm.open(initial_value)) return shm; return std::nullopt; } } // namespace Stockfish::shm #endif // #ifndef SHM_LINUX_H_INCLUDED ================================================ FILE: src/syzygy/tbprobe.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "tbprobe.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../bitboard.h" #include "../misc.h" #include "../movegen.h" #include "../position.h" #include "../search.h" #include "../types.h" #include "../ucioption.h" #ifndef _WIN32 #include #include #include #else #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX #define NOMINMAX // Disable macros min() and max() #endif #include #endif using namespace Stockfish::Tablebases; int Stockfish::Tablebases::MaxCardinality; namespace Stockfish { namespace { constexpr int TBPIECES = 7; // Max number of supported pieces constexpr int MAX_DTZ = 1 << 18; // Max DTZ supported times 2, large enough to deal with the syzygy TB limit. enum { BigEndian, LittleEndian }; enum TBType { WDL, DTZ }; // Used as template parameter // Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, SingleValue = 128 }; inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; int MapPawns[SQUARE_NB]; int MapB1H1H7[SQUARE_NB]; int MapA1D1D4[SQUARE_NB]; int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] // Comparison function to sort leading pawns in ascending MapPawns[] order bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; } int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } constexpr Value WDL_to_value[] = {-VALUE_MATE + MAX_PLY + 1, VALUE_DRAW - 2, VALUE_DRAW, VALUE_DRAW + 2, VALUE_MATE - MAX_PLY - 1}; template inline void swap_endian(T& x) { static_assert(std::is_unsigned_v, "Argument of swap_endian not unsigned"); uint8_t tmp, *c = (uint8_t*) &x; for (int i = 0; i < Half; ++i) tmp = c[i], c[i] = c[End - i], c[End - i] = tmp; } template<> inline void swap_endian(uint8_t&) {} template T number(void* addr) { T v; if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare) std::memcpy(&v, addr, sizeof(T)); else v = *((T*) addr); if (LE != IsLittleEndian) swap_endian(v); return v; } // DTZ tables don't store valid scores for moves that reset the rule50 counter // like captures and pawn moves but we can easily recover the correct dtz of the // previous move if we know the position's WDL score. int dtz_before_zeroing(WDLScore wdl) { return wdl == WDLWin ? 1 : wdl == WDLCursedWin ? 101 : wdl == WDLBlessedLoss ? -101 : wdl == WDLLoss ? -1 : 0; } // Return the sign of a number (-1, 0, 1) template int sign_of(T val) { return (T(0) < val) - (val < T(0)); } // Numbers in little-endian used by sparseIndex[] to point into blockLength[] struct SparseEntry { char block[4]; // Number of block char offset[2]; // Offset within the block }; static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); using Sym = uint16_t; // Huffman symbol struct LR { enum Side { Left, Right }; uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 // bits is the right-hand symbol. If the symbol has length 1, // then the left-hand symbol is the stored value. template Sym get() { return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] : S == Right ? (lr[2] << 4) | (lr[1] >> 4) : (assert(false), Sym(-1)); } }; static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes"); // Tablebases data layout is structured as following: // // TBFile: memory maps/unmaps the physical .rtbw and .rtbz files // TBTable: one object for each file with corresponding indexing information // TBTables: has ownership of TBTable objects, keeping a list and a hash // class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are // memory mapped for best performance. Files are mapped at first access: at init // time only existence of the file is checked. class TBFile: public std::ifstream { std::string fname; public: // Look for and open the file among the Paths directories where the .rtbw // and .rtbz files can be found. Multiple directories are separated by ";" // on Windows and by ":" on Unix-based operating systems. // // Example: // C:\tb\wdl345;C:\tb\wdl6;D:\tb\dtz345;D:\tb\dtz6 static std::string Paths; TBFile(const std::string& f) { #ifndef _WIN32 constexpr char SepChar = ':'; #else constexpr char SepChar = ';'; #endif std::stringstream ss(Paths); std::string path; while (std::getline(ss, path, SepChar)) { fname = path + "/" + f; std::ifstream::open(fname); if (is_open()) return; } } // Memory map the file and check it. uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { if (is_open()) close(); // Need to re-open to get native file descriptor #ifndef _WIN32 struct stat statbuf; int fd = ::open(fname.c_str(), O_RDONLY); if (fd == -1) return *baseAddress = nullptr, nullptr; fstat(fd, &statbuf); if (statbuf.st_size % 64 != 16) { std::cerr << "Corrupt tablebase file " << fname << std::endl; exit(EXIT_FAILURE); } *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); #if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); #endif ::close(fd); if (*baseAddress == MAP_FAILED) { std::cerr << "Could not mmap() " << fname << std::endl; exit(EXIT_FAILURE); } #else // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); if (fd == INVALID_HANDLE_VALUE) return *baseAddress = nullptr, nullptr; DWORD size_high; DWORD size_low = GetFileSize(fd, &size_high); if (size_low % 64 != 16) { std::cerr << "Corrupt tablebase file " << fname << std::endl; exit(EXIT_FAILURE); } HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr); CloseHandle(fd); if (!mmap) { std::cerr << "CreateFileMapping() failed" << std::endl; exit(EXIT_FAILURE); } *mapping = uint64_t(mmap); *baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0); if (!*baseAddress) { std::cerr << "MapViewOfFile() failed, name = " << fname << ", error = " << GetLastError() << std::endl; exit(EXIT_FAILURE); } #endif uint8_t* data = (uint8_t*) *baseAddress; constexpr uint8_t Magics[][4] = {{0xD7, 0x66, 0x0C, 0xA5}, {0x71, 0xE8, 0x23, 0x5D}}; if (memcmp(data, Magics[type == WDL], 4)) { std::cerr << "Corrupted table in file " << fname << std::endl; unmap(*baseAddress, *mapping); return *baseAddress = nullptr, nullptr; } return data + 4; // Skip Magics's header } static void unmap(void* baseAddress, uint64_t mapping) { #ifndef _WIN32 munmap(baseAddress, mapping); #else UnmapViewOfFile(baseAddress); CloseHandle((HANDLE) mapping); #endif } }; std::string TBFile::Paths; // struct PairsData contains low-level indexing information to access TB data. // There are 8, 4, or 2 PairsData records for each TBTable, according to the type // of table and if positions have pawns or not. It is populated at first access. struct PairsData { uint8_t flags; // Table flags, see enum TBFlag uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols uint8_t minSymLen; // Minimum length in bits of the Huffman symbols uint32_t blocksNum; // Number of blocks in the TB file size_t sizeofBlock; // Block size in bytes size_t span; // About every span values there is a SparseIndex[] entry Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value LR* btree; // btree[sym] stores the left and right symbols that expand sym uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum SparseEntry* sparseIndex; // Partial indices into blockLength[] size_t sparseIndexSize; // Size of SparseIndex[] table uint8_t* data; // Start of Huffman compressed data std::vector base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l std::vector symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups uint64_t groupIdx[TBPIECES + 1]; // Start index used for the encoding of the group's pieces int groupLen[TBPIECES + 1]; // Number of pieces in a given group: KRKN -> (3, 1) uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) }; // struct TBTable contains indexing information to access the corresponding TBFile. // There are 2 types of TBTable, corresponding to a WDL or a DTZ file. TBTable // is populated at init time but the nested PairsData records are populated at // first access, when the corresponding file is memory mapped. template struct TBTable { using Ret = std::conditional_t; static constexpr int Sides = Type == WDL ? 2 : 1; std::atomic_bool ready; void* baseAddress; uint8_t* map; uint64_t mapping; Key key; Key key2; int pieceCount; bool hasPawns; bool hasUniquePieces; uint8_t pawnCount[2]; // [Lead color / other color] PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] PairsData* get(int stm, int f) { return &items[stm % Sides][hasPawns ? f : 0]; } TBTable() : ready(false), baseAddress(nullptr) {} explicit TBTable(const std::string& code); explicit TBTable(const TBTable& wdl); ~TBTable() { if (baseAddress) TBFile::unmap(baseAddress, mapping); } }; template<> TBTable::TBTable(const std::string& code) : TBTable() { StateInfo st; Position pos; auto err = pos.set(code, WHITE, &st); // IMPORTANT: We cannot assert here because it WILL produce validation errors // on some TB7 and higher positions due to the black king being attacked // while white is to move. This is not fixable without significant changes. // As using pos.set here is already a very hacky way to achieve the desired // result here so we leave it for now. The validation checks that fail are // done after the position is fully set up, so it's fine for now. // assert(!err.has_value()); (void) err; key = pos.material_key(); pieceCount = pos.count(); hasPawns = pos.pieces(PAWN); hasUniquePieces = false; for (Color c : {WHITE, BLACK}) for (PieceType pt = PAWN; pt < KING; ++pt) if (popcount(pos.pieces(c, pt)) == 1) hasUniquePieces = true; // Set the leading color. In case both sides have pawns the leading color // is the side with fewer pawns because this leads to better compression. bool c = !pos.count(BLACK) || (pos.count(WHITE) && pos.count(BLACK) >= pos.count(WHITE)); pawnCount[0] = pos.count(c ? WHITE : BLACK); pawnCount[1] = pos.count(c ? BLACK : WHITE); err = pos.set(code, BLACK, &st); // IMPORTANT: We cannot assert here because it WILL produce validation errors // on some TB7 and higher positions due to the black king being attacked // while white is to move. This is not fixable without significant changes. // As using pos.set here is already a very hacky way to achieve the desired // result here so we leave it for now. The validation checks that fail are // done after the position is fully set up, so it's fine for now. // assert(!err.has_value()); (void) err; key2 = pos.material_key(); } template<> TBTable::TBTable(const TBTable& wdl) : TBTable() { // Use the corresponding WDL table to avoid recalculating all from scratch key = wdl.key; key2 = wdl.key2; pieceCount = wdl.pieceCount; hasPawns = wdl.hasPawns; hasUniquePieces = wdl.hasUniquePieces; pawnCount[0] = wdl.pawnCount[0]; pawnCount[1] = wdl.pawnCount[1]; } // class TBTables creates and keeps ownership of the TBTable objects, one for // each TB file found. It supports a fast, hash-based, table lookup. Populated // at init time, accessed at probe time. class TBTables { struct Entry { Key key; TBTable* wdl; TBTable* dtz; template TBTable* get() const { return (TBTable*) (Type == WDL ? (void*) wdl : (void*) dtz); } }; static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket Entry hashTable[Size + Overflow]; std::deque> wdlTable; std::deque> dtzTable; size_t foundDTZFiles = 0; size_t foundWDLFiles = 0; void insert(Key key, TBTable* wdl, TBTable* dtz) { uint32_t homeBucket = uint32_t(key) & (Size - 1); Entry entry{key, wdl, dtz}; // Ensure last element is empty to avoid overflow when looking up for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) { Key otherKey = hashTable[bucket].key; if (otherKey == key || !hashTable[bucket].get()) { hashTable[bucket] = entry; return; } // Robin Hood hashing: If we've probed for longer than this element, // insert here and search for a new spot for the other element instead. uint32_t otherHomeBucket = uint32_t(otherKey) & (Size - 1); if (otherHomeBucket > homeBucket) { std::swap(entry, hashTable[bucket]); key = otherKey; homeBucket = otherHomeBucket; } } std::cerr << "TB hash table size too low!" << std::endl; exit(EXIT_FAILURE); } public: template TBTable* get(Key key) { for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)];; ++entry) { if (entry->key == key || !entry->get()) return entry->get(); } } void clear() { memset(hashTable, 0, sizeof(hashTable)); wdlTable.clear(); dtzTable.clear(); foundDTZFiles = 0; foundWDLFiles = 0; } void info() const { sync_cout << "info string Found " << foundWDLFiles << " WDL and " << foundDTZFiles << " DTZ tablebase files (up to " << MaxCardinality << "-man)." << sync_endl; } void add(const std::vector& pieces); }; TBTables TBTables; // If the corresponding file exists two new objects TBTable and TBTable // are created and added to the lists and hash table. Called at init time. void TBTables::add(const std::vector& pieces) { std::string code; for (PieceType pt : pieces) code += PieceToChar[pt]; code.insert(code.find('K', 1), "v"); TBFile file_dtz(code + ".rtbz"); // KRK -> KRvK if (file_dtz.is_open()) { file_dtz.close(); foundDTZFiles++; } TBFile file(code + ".rtbw"); // KRK -> KRvK if (!file.is_open()) // Only WDL file is checked return; file.close(); foundWDLFiles++; MaxCardinality = std::max(int(pieces.size()), MaxCardinality); wdlTable.emplace_back(code); dtzTable.emplace_back(wdlTable.back()); // Insert into the hash keys for both colors: KRvK with KR white and black insert(wdlTable.back().key, &wdlTable.back(), &dtzTable.back()); insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back()); } // TB tables are compressed with canonical Huffman code. The compressed data is divided into // blocks of size d->sizeofBlock, and each block stores a variable number of symbols. // Each symbol represents either a WDL or a (remapped) DTZ value, or a pair of other symbols // (recursively). If you keep expanding the symbols in a block, you end up with up to 65536 // WDL or DTZ values. Each symbol represents up to 256 values and will correspond after // Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most // 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly // of draws or mostly of wins, but such tables are actually quite common. In principle, the // blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for // mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so // in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long. // The generator picks the size that leads to the smallest table. The "book" of symbols and // Huffman codes are the same for all blocks in the table. A non-symmetric pawnless TB file // will have one table for wtm and one for btm, a TB file with pawns will have tables per // file a,b,c,d also, in this case, one set for wtm and one for btm. int decompress_pairs(PairsData* d, uint64_t idx) { // Special case where all table positions store the same value if (d->flags & TBFlag::SingleValue) return d->minSymLen; // First we need to locate the right block that stores the value at index "idx". // Because each block n stores blockLength[n] + 1 values, the index i of the block // that contains the value at position idx is: // // for (i = -1, sum = 0; sum <= idx; i++) // sum += blockLength[i + 1] + 1; // // This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that // point to known indices into blockLength[]. Namely SparseIndex[k] is a SparseEntry // that stores the blockLength[] index and the offset within that block of the value // with index I(k), where: // // I(k) = k * d->span + d->span / 2 (1) // First step is to get the 'k' of the I(k) nearest to our idx, using definition (1) uint32_t k = uint32_t(idx / d->span); // Then we read the corresponding SparseIndex[] entry uint32_t block = number(&d->sparseIndex[k].block); int offset = number(&d->sparseIndex[k].offset); // Now compute the difference idx - I(k). From the definition of k, we know that // // idx = k * d->span + idx % d->span (2) // // So from (1) and (2) we can compute idx - I(K): int diff = int(idx % d->span - d->span / 2); // Sum the above to offset to find the offset corresponding to our idx offset += diff; // Move to the previous/next block, until we reach the correct block that contains idx, // that is when 0 <= offset <= d->blockLength[block] while (offset < 0) offset += d->blockLength[--block] + 1; while (offset > d->blockLength[block]) offset -= d->blockLength[block++] + 1; // Finally, we find the start address of our block of canonical Huffman symbols uint32_t* ptr = (uint32_t*) (d->data + (uint64_t(block) * d->sizeofBlock)); // Read the first 64 bits in our block, this is a (truncated) sequence of // unknown number of symbols of unknown length but we know the first one // is at the beginning of this 64-bit sequence. uint64_t buf64 = number(ptr); ptr += 2; int buf64Size = 64; Sym sym; while (true) { int len = 0; // This is the symbol length - d->min_sym_len // Now get the symbol length. For any symbol s64 of length l right-padded // to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we // can find the symbol length iterating through base64[]. while (buf64 < d->base64[len]) ++len; // All the symbols of a given length are consecutive integers (numerical // sequence property), so we can compute the offset of our symbol of // length len, stored at the beginning of buf64. sym = Sym((buf64 - d->base64[len]) >> (64 - len - d->minSymLen)); // Now add the value of the lowest symbol of length len to get our symbol sym += number(&d->lowestSym[len]); // If our offset is within the number of values represented by symbol sym, // we are done. if (offset < d->symlen[sym] + 1) break; // ...otherwise update the offset and continue to iterate offset -= d->symlen[sym] + 1; len += d->minSymLen; // Get the real length buf64 <<= len; // Consume the just processed symbol buf64Size -= len; if (buf64Size <= 32) { // Refill the buffer buf64Size += 32; buf64 |= uint64_t(number(ptr++)) << (64 - buf64Size); } } // Now we have our symbol that expands into d->symlen[sym] + 1 symbols. // We binary-search for our value recursively expanding into the left and // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 // that will store the value we need. while (d->symlen[sym]) { Sym left = d->btree[sym].get(); // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and // expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then // we know that, for instance, the tenth value (offset = 10) will be on // the left side because in Recursive Pairing child symbols are adjacent. if (offset < d->symlen[left] + 1) sym = left; else { offset -= d->symlen[left] + 1; sym = d->btree[sym].get(); } } return d->btree[sym].get(); } bool check_dtz_stm(TBTable*, int, File) { return true; } bool check_dtz_stm(TBTable* entry, int stm, File f) { auto flags = entry->get(stm, f)->flags; return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns); } // DTZ scores are sorted by frequency of occurrence and then assigned the // values 0, 1, 2, ... in order of decreasing frequency. This is done for each // of the four WDLScore values. The mapping information necessary to reconstruct // the original values are stored in the TB file and read during map[] init. WDLScore map_score(TBTable*, File, int value, WDLScore) { return WDLScore(value - 2); } int map_score(TBTable* entry, File f, int value, WDLScore wdl) { constexpr int WDLMap[] = {1, 3, 0, 2, 0}; auto flags = entry->get(0, f)->flags; uint8_t* map = entry->map; uint16_t* idx = entry->get(0, f)->map_idx; if (flags & TBFlag::Mapped) { if (flags & TBFlag::Wide) value = ((uint16_t*) map)[idx[WDLMap[wdl + 2]] + value]; else value = map[idx[WDLMap[wdl + 2]] + value]; } // DTZ tables store distance to zero in number of moves or plies. We // want to return plies, so we have to convert to plies when needed. if ((wdl == WDLWin && !(flags & TBFlag::WinPlies)) || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin || wdl == WDLBlessedLoss) value *= 2; return value + 1; } // A temporary fix for the compiler bug with vectorization. (#4450) #if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 15 #define DISABLE_CLANG_LOOP_VEC _Pragma("clang loop vectorize(disable)") #else #define DISABLE_CLANG_LOOP_VEC #endif // Compute a unique index out of a position and use it to probe the TB file. To // encode k pieces of the same type and color, first sort the pieces by square in // ascending order s1 <= s2 <= ... <= sk then compute the unique index as: // // idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk] // template Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { Square squares[TBPIECES]; Piece pieces[TBPIECES]; uint64_t idx; int next = 0, size = 0, leadPawnsCnt = 0; PairsData* d; Bitboard b, leadPawns = 0; File tbFile = FILE_A; // A given TB entry like KRK has associated two material keys: KRvk and Kvkr. // If both sides have the same pieces keys are equal. In this case TB tables // only stores the 'white to move' case, so if the position to lookup has black // to move, we need to switch the color and flip the squares before to lookup. bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move()); // TB files are calculated for white as the stronger side. For instance, we // have KRvK, not KvKR. A position where the stronger side is white will have // its material key == entry->key, otherwise we have to switch the color and // flip the squares before to lookup. bool blackStronger = (pos.material_key() != entry->key); int flipColor = (symmetricBlackToMove || blackStronger) * 8; int flipSquares = (symmetricBlackToMove || blackStronger) * 56; int stm = (symmetricBlackToMove || blackStronger) ^ pos.side_to_move(); // For pawns, TB files store 4 separate tables according if leading pawn is on // file a, b, c or d after reordering. The leading pawn is the one with maximum // MapPawns[] value, that is the one most toward the edges and with lowest rank. if (entry->hasPawns) { // In all the 4 tables, pawns are at the beginning of the piece sequence and // their color is the reference one. So we just pick the first one. Piece pc = Piece(entry->get(0, 0)->pieces[0] ^ flipColor); assert(type_of(pc) == PAWN); leadPawns = b = pos.pieces(color_of(pc), PAWN); do squares[size++] = pop_lsb(b) ^ flipSquares; while (b); leadPawnsCnt = size; std::swap(squares[0], *std::max_element(squares, squares + leadPawnsCnt, pawns_comp)); tbFile = File(edge_distance(file_of(squares[0]))); } // DTZ tables are one-sided, i.e. they store positions only for white to // move or only for black to move, so check for side to move to be stm, // early exit otherwise. if (!check_dtz_stm(entry, stm, tbFile)) return *result = CHANGE_STM, Ret(); // Now we are ready to get all the position pieces (but the lead pawns) and // directly map them to the correct color and square. b = pos.pieces() ^ leadPawns; do { Square s = pop_lsb(b); squares[size] = s ^ flipSquares; pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); } while (b); assert(size >= 2); d = entry->get(stm, tbFile); // Then we reorder the pieces to have the same sequence as the one stored // in pieces[i]: the sequence that ensures the best compression. for (int i = leadPawnsCnt; i < size - 1; ++i) for (int j = i + 1; j < size; ++j) if (d->pieces[i] == pieces[j]) { std::swap(pieces[i], pieces[j]); std::swap(squares[i], squares[j]); break; } // Now we map again the squares so that the square of the lead piece is in // the triangle A1-D1-D4. if (file_of(squares[0]) > FILE_D) { DISABLE_CLANG_LOOP_VEC for (int i = 0; i < size; ++i) squares[i] = flip_file(squares[i]); } // Encode leading pawns starting with the one with minimum MapPawns[] and // proceeding in ascending order. if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; goto encode_remaining; // With pawns we have finished special treatments } // In positions without pawns, we further flip the squares to ensure leading // piece is below RANK_5. if (rank_of(squares[0]) > RANK_4) { DISABLE_CLANG_LOOP_VEC for (int i = 0; i < size; ++i) squares[i] = flip_rank(squares[i]); } // Look for the first piece of the leading group not on the A1-D4 diagonal // and ensure it is mapped below the diagonal. DISABLE_CLANG_LOOP_VEC for (int i = 0; i < d->groupLen[0]; ++i) { if (!off_A1H8(squares[i])) continue; if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 { DISABLE_CLANG_LOOP_VEC for (int j = i; j < size; ++j) squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63); } break; } // Encode the leading group. // // Suppose we have KRvK. Let's say the pieces are on square numbers wK, wR // and bK (each 0...63). The simplest way to map this position to an index // is like this: // // index = wK * 64 * 64 + wR * 64 + bK; // // But this way the TB is going to have 64*64*64 = 262144 positions, with // lots of positions being equivalent (because they are mirrors of each // other) and lots of positions being invalid (two pieces on one square, // adjacent kings, etc.). // Usually the first step is to take the wK and bK together. There are just // 462 ways legal and not-mirrored ways to place the wK and bK on the board. // Once we have placed the wK and bK, there are 62 squares left for the wR // Mapping its square from 0..63 to available squares 0..61 can be done like: // // wR -= (wR > wK) + (wR > bK); // // In words: if wR "comes later" than wK, we deduct 1, and the same if wR // "comes later" than bK. In case of two same pieces like KRRvK we want to // place the two Rs "together". If we have 62 squares left, we can place two // Rs "together" in 62 * 61 / 2 ways (we divide by 2 because rooks can be // swapped and still get the same position.) // // In case we have at least 3 unique pieces (including kings) we encode them // together. if (entry->hasUniquePieces) { int adjust1 = squares[1] > squares[0]; int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]); // First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3 // triangle to 0...5. There are 63 squares for second piece and 62 // (mapped to 0...61) for the third. if (off_A1H8(squares[0])) idx = (MapA1D1D4[squares[0]] * 63 + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; // First piece is on a1-h8 diagonal, second below: map this occurrence to // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. else if (off_A1H8(squares[1])) idx = (6 * 63 + rank_of(squares[0]) * 28 + MapB1H1H7[squares[1]]) * 62 + squares[2] - adjust2; // First two pieces are on a1-h8 diagonal, third below else if (off_A1H8(squares[2])) idx = 6 * 63 * 62 + 4 * 28 * 62 + rank_of(squares[0]) * 7 * 28 + (rank_of(squares[1]) - adjust1) * 28 + MapB1H1H7[squares[2]]; // All 3 pieces on the diagonal a1-h8 else idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + rank_of(squares[0]) * 7 * 6 + (rank_of(squares[1]) - adjust1) * 6 + (rank_of(squares[2]) - adjust2); } else // We don't have at least 3 unique pieces, like in KRRvKBB, just map // the kings. idx = MapKK[MapA1D1D4[squares[0]]][squares[1]]; encode_remaining: idx *= d->groupIdx[0]; Square* groupSq = squares + d->groupLen[0]; // Encode remaining pawns and then pieces according to square, in ascending order bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; while (d->groupLen[++next]) { std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous // groups (similar to what was done earlier for leading group pieces). for (int i = 0; i < d->groupLen[next]; ++i) { auto f = [&](Square s) { return groupSq[i] > s; }; auto adjust = std::count_if(squares, groupSq, f); n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns]; } remainingPawns = false; idx += n * d->groupIdx[next]; groupSq += d->groupLen[next]; } // Now that we have the index, decompress the pair and get the score return map_score(entry, tbFile, decompress_pairs(d, idx), wdl); } // Group together pieces that will be encoded together. The general rule is that // a group contains pieces of the same type and color. The exception is the leading // group that, in case of positions without pawns, can be formed by 3 different // pieces (default) or by the king pair when there is not a unique piece apart // from the kings. When there are pawns, pawns are always first in pieces[]. // // As example KRKN -> KRK + N, KNNK -> KK + NN, KPPKP -> P + PP + K + K // // The actual grouping depends on the TB generator and can be inferred from the // sequence of pieces in piece[] array. template void set_groups(T& e, PairsData* d, int order[], File f) { int n = 0, firstLen = e.hasPawns ? 0 : e.hasUniquePieces ? 3 : 2; d->groupLen[n] = 1; // Number of pieces per group is stored in groupLen[], for instance in KRKN // the encoder will default on '111', so groupLen[] will be (3, 1). for (int i = 1; i < e.pieceCount; ++i) if (--firstLen > 0 || d->pieces[i] == d->pieces[i - 1]) d->groupLen[n]++; else d->groupLen[++n] = 1; d->groupLen[++n] = 0; // Zero-terminated // The sequence in pieces[] defines the groups, but not the order in which // they are encoded. If the pieces in a group g can be combined on the board // in N(g) different ways, then the position encoding will be of the form: // // g1 * N(g2) * N(g3) + g2 * N(g3) + g3 // // This ensures unique encoding for the whole position. The order of the // groups is a per-table parameter and could not follow the canonical leading // pawns/pieces -> remaining pawns -> remaining pieces. In particular the // first group is at order[0] position and the remaining pawns, when present, // are at order[1] position. bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides int next = pp ? 2 : 1; int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); uint64_t idx = 1; for (int k = 0; next < n || k == order[0] || k == order[1]; ++k) if (k == order[0]) // Leading pawns or pieces { d->groupIdx[0] = idx; idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] : e.hasUniquePieces ? 31332 : 462; } else if (k == order[1]) // Remaining pawns { d->groupIdx[1] = idx; idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; } else // Remaining pieces { d->groupIdx[next] = idx; idx *= Binomial[d->groupLen[next]][freeSquares]; freeSquares -= d->groupLen[next++]; } d->groupIdx[n] = idx; } // In Recursive Pairing each symbol represents a pair of children symbols. So // read d->btree[] symbols data and expand each one in his left and right child // symbol until reaching the leaves that represent the symbol value. uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { visited[s] = true; // We can set it now because tree is acyclic Sym sr = d->btree[s].get(); if (sr == 0xFFF) return 0; Sym sl = d->btree[s].get(); if (!visited[sl]) d->symlen[sl] = set_symlen(d, sl, visited); if (!visited[sr]) d->symlen[sr] = set_symlen(d, sr, visited); return d->symlen[sl] + d->symlen[sr] + 1; } uint8_t* set_sizes(PairsData* d, uint8_t* data) { d->flags = *data++; if (d->flags & TBFlag::SingleValue) { d->blocksNum = d->blockLengthSize = 0; d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init d->minSymLen = *data++; // Here we store the single value return data; } // groupLen[] is a zero-terminated list of group lengths, the last groupIdx[] // element stores the biggest index that is the tb size. uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen]; d->sizeofBlock = 1ULL << *data++; d->span = 1ULL << *data++; d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up auto padding = number(data++); d->blocksNum = number(data); data += sizeof(uint32_t); d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] // does not point out of range. d->maxSymLen = *data++; d->minSymLen = *data++; d->lowestSym = (Sym*) data; d->base64.resize(d->maxSymLen - d->minSymLen + 1); // See https://en.wikipedia.org/wiki/Huffman_coding // The canonical code is ordered such that longer symbols (in terms of // the number of bits of their Huffman code) have a lower numeric value, // so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian). // Starting from this we compute a base64[] table indexed by symbol length // and containing 64 bit values so that d->base64[i] >= d->base64[i+1]. // Implementation note: we first cast the unsigned size_t "base64.size()" // to a signed int "base64_size" variable and then we are able to subtract 2, // avoiding unsigned overflow warnings. int base64_size = static_cast(d->base64.size()); for (int i = base64_size - 2; i >= 0; --i) { d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) - number(&d->lowestSym[i + 1])) / 2; assert(d->base64[i] * 2 >= d->base64[i + 1]); } // Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more // than d->base64[i+1] and given the above assert condition, we ensure that // d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i // and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i]. for (int i = 0; i < base64_size; ++i) d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits data += base64_size * sizeof(Sym); d->symlen.resize(number(data)); data += sizeof(uint16_t); d->btree = (LR*) data; // The compression scheme used is "Recursive Pairing", that replaces the most // frequent adjacent pair of symbols in the source message by a new symbol, // reevaluating the frequencies of all of the symbol pairs with respect to // the extended alphabet, and then repeating the process. // See https://web.archive.org/web/20201106232444/http://www.larsson.dogma.net/dcc99.pdf std::vector visited(d->symlen.size()); for (Sym sym = 0; sym < d->symlen.size(); ++sym) if (!visited[sym]) d->symlen[sym] = set_symlen(d, sym, visited); return data + d->symlen.size() * sizeof(LR) + (d->symlen.size() & 1); } uint8_t* set_dtz_map(TBTable&, uint8_t* data, File) { return data; } uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { e.map = data; for (File f = FILE_A; f <= maxFile; ++f) { auto flags = e.get(0, f)->flags; if (flags & TBFlag::Mapped) { if (flags & TBFlag::Wide) { data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x e.get(0, f)->map_idx[i] = uint16_t((uint16_t*) data - (uint16_t*) e.map + 1); data += 2 * number(data) + 2; } } else { for (int i = 0; i < 4; ++i) { e.get(0, f)->map_idx[i] = uint16_t(data - e.map + 1); data += *data + 1; } } } } return data += uintptr_t(data) & 1; // Word alignment } // Populate entry's PairsData records with data from the just memory-mapped file. // Called at first access. template void set(T& e, uint8_t* data) { PairsData* d; enum { Split = 1, HasPawns = 2 }; assert(e.hasPawns == bool(*data & HasPawns)); assert((e.key != e.key2) == bool(*data & Split)); data++; // First byte stores flags const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; const File maxFile = e.hasPawns ? FILE_D : FILE_A; bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides assert(!pp || e.pawnCount[0]); for (File f = FILE_A; f <= maxFile; ++f) { for (int i = 0; i < sides; i++) *e.get(i, f) = PairsData(); int order[][2] = {{*data & 0xF, pp ? *(data + 1) & 0xF : 0xF}, {*data >> 4, pp ? *(data + 1) >> 4 : 0xF}}; data += 1 + pp; for (int k = 0; k < e.pieceCount; ++k, ++data) for (int i = 0; i < sides; i++) e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); for (int i = 0; i < sides; ++i) set_groups(e, e.get(i, f), order[i], f); } data += uintptr_t(data) & 1; // Word alignment for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) data = set_sizes(e.get(i, f), data); data = set_dtz_map(e, data, maxFile); for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { (d = e.get(i, f))->sparseIndex = (SparseEntry*) data; data += d->sparseIndexSize * sizeof(SparseEntry); } for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { (d = e.get(i, f))->blockLength = (uint16_t*) data; data += d->blockLengthSize * sizeof(uint16_t); } for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { data = (uint8_t*) ((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment (d = e.get(i, f))->data = data; data += d->blocksNum * d->sizeofBlock; } } // If the TB file corresponding to the given position is already memory-mapped // then return its base address, otherwise, try to memory map and init it. Called // at every probe, memory map, and init only at first access. Function is thread // safe and can be called concurrently. template void* mapped(TBTable& e, const Position& pos) { static std::mutex mutex; // Because TB is the only usage of materialKey, check it here in debug mode assert(pos.material_key_is_ok()); // Use 'acquire' to avoid a thread reading 'ready' == true while // another is still working. (compiler reordering may cause this). if (e.ready.load(std::memory_order_acquire)) return e.baseAddress; // Could be nullptr if file does not exist std::scoped_lock lk(mutex); if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock return e.baseAddress; // Pieces strings in decreasing order for each color, like ("KPP","KR") std::string fname, w, b; for (PieceType pt = KING; pt >= PAWN; --pt) { w += std::string(popcount(pos.pieces(WHITE, pt)), PieceToChar[pt]); b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]); } fname = (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + (Type == WDL ? ".rtbw" : ".rtbz"); uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type); if (data) set(e, data); e.ready.store(true, std::memory_order_release); return e.baseAddress; } template::Ret> Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) { if (pos.count() == 2) // KvK return Ret(WDLDraw); TBTable* entry = TBTables.get(pos.material_key()); if (!entry || !mapped(*entry, pos)) return *result = FAIL, Ret(); return do_probe_table(pos, entry, wdl, result); } // For a position where the side to move has a winning capture it is not necessary // to store a winning value so the generator treats such positions as "don't care" // and tries to assign to it a value that improves the compression ratio. Similarly, // if the side to move has a drawing capture, then the position is at least drawn. // If the position is won, then the TB needs to store a win value. But if the // position is drawn, the TB may store a loss value if that is better for compression. // All of this means that during probing, the engine must look at captures and probe // their results and must probe the position itself. The "best" result of these // probes is the correct result for the position. // DTZ tables do not store values when a following move is a zeroing winning move // (winning capture or winning pawn move). Also, DTZ store wrong values for positions // where the best move is an ep-move (even if losing). So in all these cases set // the state to ZEROING_BEST_MOVE. template WDLScore search(Position& pos, ProbeState* result) { WDLScore value, bestValue = WDLLoss; StateInfo st; auto moveList = MoveList(pos); size_t totalCount = moveList.size(), moveCount = 0; for (const Move move : moveList) { if (!pos.capture(move) && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) continue; moveCount++; pos.do_move(move, st); value = -search(pos, result); pos.undo_move(move); if (*result == FAIL) return WDLDraw; if (value > bestValue) { bestValue = value; if (value >= WDLWin) { *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move return value; } } } // In case we have already searched all the legal moves we don't have to probe // the TB because the stored score could be wrong. For instance TB tables // do not contain information on position with ep rights, so in this case // the result of probe_wdl_table is wrong. Also in case of only capture // moves, for instance here 4K3/4q3/6p1/2k5/6p1/8/8/8 w - - 0 7, we have to // return with ZEROING_BEST_MOVE set. bool noMoreMoves = (moveCount && moveCount == totalCount); if (noMoreMoves) value = bestValue; else { value = probe_table(pos, result); if (*result == FAIL) return WDLDraw; } // DTZ stores a "don't care" value if bestValue is a win if (bestValue >= value) return *result = (bestValue > WDLDraw || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; return *result = OK, value; } } // namespace // Called at startup and after every change to // "SyzygyPath" UCI option to (re)create the various tables. It is not thread // safe, nor it needs to be. void Tablebases::init(const std::string& paths) { TBTables.clear(); MaxCardinality = 0; TBFile::Paths = paths; if (paths.empty()) return; // MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27 int code = 0; for (Square s = SQ_A1; s <= SQ_H8; ++s) if (off_A1H8(s) < 0) MapB1H1H7[s] = code++; // MapA1D1D4[] encodes a square in the a1-d1-d4 triangle to 0..9 std::vector diagonal; code = 0; for (Square s = SQ_A1; s <= SQ_D4; ++s) if (off_A1H8(s) < 0 && file_of(s) <= FILE_D) MapA1D1D4[s] = code++; else if (!off_A1H8(s) && file_of(s) <= FILE_D) diagonal.push_back(s); // Diagonal squares are encoded as last ones for (auto s : diagonal) MapA1D1D4[s] = code++; // MapKK[] encodes all the 462 possible legal positions of two kings where // the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4 // diagonal, the other one shall not be above the a1-h8 diagonal. std::vector> bothOnDiagonal; code = 0; for (int idx = 0; idx < 10; idx++) for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1) if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 { for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) if ((PseudoAttacks[KING][s1] | s1) & s2) continue; // Illegal position else if (!off_A1H8(s1) && off_A1H8(s2) > 0) continue; // First on diagonal, second above else if (!off_A1H8(s1) && !off_A1H8(s2)) bothOnDiagonal.emplace_back(idx, s2); else MapKK[idx][s2] = code++; } // Legal positions with both kings on a diagonal are encoded as last ones for (auto p : bothOnDiagonal) MapKK[p.first][p.second] = code++; // Binomial[] stores the Binomial Coefficients using Pascal rule. There // are Binomial[k][n] ways to choose k elements from a set of n elements. Binomial[0][0] = 1; for (int n = 1; n < 64; n++) // Squares for (int k = 0; k < 6 && k <= n; ++k) // Pieces Binomial[k][n] = (k > 0 ? Binomial[k - 1][n - 1] : 0) + (k < n ? Binomial[k][n - 1] : 0); // MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible // available squares when the leading one is in 's'. Moreover the pawn with // highest MapPawns[] is the leading pawn, the one nearest the edge, and // among pawns with the same file, the one with the lowest rank. int availableSquares = 47; // Available squares when lead pawn is in a2 // Init the tables for the encoding of leading pawns group: with 7-men TB we // can have up to 5 leading pawns (KPPPPPK). for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) for (File f = FILE_A; f <= FILE_D; ++f) { // Restart the index at every file because TB table is split // by file, so we can reuse the same index for different files. int idx = 0; // Sum all possible combinations for a given file, starting with // the leading pawn on rank 2 and increasing the rank. for (Rank r = RANK_2; r <= RANK_7; ++r) { Square sq = make_square(f, r); // Compute MapPawns[] at first pass. // If sq is the leading pawn square, any other pawn cannot be // below or more toward the edge of sq. There are 47 available // squares when sq = a2 and reduced by 2 for any rank increase // due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45 if (leadPawnsCnt == 1) { MapPawns[sq] = availableSquares--; MapPawns[flip_file(sq)] = availableSquares--; } LeadPawnIdx[leadPawnsCnt][sq] = idx; idx += Binomial[leadPawnsCnt - 1][MapPawns[sq]]; } // After a file is traversed, store the cumulated per-file index LeadPawnsSize[leadPawnsCnt][f] = idx; } // Add entries in TB tables if the corresponding ".rtbw" file exists for (PieceType p1 = PAWN; p1 < KING; ++p1) { TBTables.add({KING, p1, KING}); for (PieceType p2 = PAWN; p2 <= p1; ++p2) { TBTables.add({KING, p1, p2, KING}); TBTables.add({KING, p1, KING, p2}); for (PieceType p3 = PAWN; p3 < KING; ++p3) TBTables.add({KING, p1, p2, KING, p3}); for (PieceType p3 = PAWN; p3 <= p2; ++p3) { TBTables.add({KING, p1, p2, p3, KING}); for (PieceType p4 = PAWN; p4 <= p3; ++p4) { TBTables.add({KING, p1, p2, p3, p4, KING}); for (PieceType p5 = PAWN; p5 <= p4; ++p5) TBTables.add({KING, p1, p2, p3, p4, p5, KING}); for (PieceType p5 = PAWN; p5 < KING; ++p5) TBTables.add({KING, p1, p2, p3, p4, KING, p5}); } for (PieceType p4 = PAWN; p4 < KING; ++p4) { TBTables.add({KING, p1, p2, p3, KING, p4}); for (PieceType p5 = PAWN; p5 <= p4; ++p5) TBTables.add({KING, p1, p2, p3, KING, p4, p5}); } } for (PieceType p3 = PAWN; p3 <= p1; ++p3) for (PieceType p4 = PAWN; p4 <= (p1 == p3 ? p2 : p3); ++p4) TBTables.add({KING, p1, p2, KING, p3, p4}); } } TBTables.info(); } // Probe the WDL table for a particular position. // If *result != FAIL, the probe was successful. // The return value is from the point of view of the side to move: // -2 : loss // -1 : loss, but draw under 50-move rule // 0 : draw // 1 : win, but draw under 50-move rule // 2 : win WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) { *result = OK; return search(pos, result); } // Probe the DTZ table for a particular position. // If *result != FAIL, the probe was successful. // The return value is from the point of view of the side to move: // n < -100 : loss, but draw under 50-move rule // -100 <= n < -1 : loss in n ply (assuming 50-move counter == 0) // -1 : loss, the side to move is mated // 0 : draw // 1 < n <= 100 : win in n ply (assuming 50-move counter == 0) // 100 < n : win, but draw under 50-move rule // // The return value n can be off by 1: a return value -n can mean a loss // in n+1 ply and a return value +n can mean a win in n+1 ply. This // cannot happen for tables with positions exactly on the "edge" of // the 50-move rule. // // This implies that if dtz > 0 is returned, the position is certainly // a win if dtz + 50-move-counter <= 99. Care must be taken that the engine // picks moves that preserve dtz + 50-move-counter <= 99. // // If n = 100 immediately after a capture or pawn move, then the position // is also certainly a win, and during the whole phase until the next // capture or pawn move, the inequality to be preserved is // dtz + 50-move-counter <= 100. // // In short, if a move is available resulting in dtz + 50-move-counter <= 99, // then do not accept moves leading to dtz + 50-move-counter == 100. int Tablebases::probe_dtz(Position& pos, ProbeState* result) { *result = OK; WDLScore wdl = search(pos, result); if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws return 0; // DTZ stores a 'don't care value in this case, or even a plain wrong // one as in case the best move is a losing ep, so it cannot be probed. if (*result == ZEROING_BEST_MOVE) return dtz_before_zeroing(wdl); int dtz = probe_table(pos, result, wdl); if (*result == FAIL) return 0; if (*result != CHANGE_STM) return (dtz + 100 * (wdl == WDLBlessedLoss || wdl == WDLCursedWin)) * sign_of(wdl); // DTZ stores results for the other side, so we need to do a 1-ply search and // find the winning move that minimizes DTZ. StateInfo st; int minDTZ = 0xFFFF; for (const Move move : MoveList(pos)) { bool zeroing = pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN; pos.do_move(move, st); // For zeroing moves we want the dtz of the move _before_ doing it, // otherwise we will get the dtz of the next move sequence. Search the // position after the move to get the score sign (because even in a // winning position we could make a losing capture or go for a draw). dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) : -probe_dtz(pos, result); // If the move mates, force minDTZ to 1 if (dtz == 1 && pos.checkers() && MoveList(pos).size() == 0) minDTZ = 1; // Convert result from 1-ply search. Zeroing moves are already accounted // by dtz_before_zeroing() that returns the DTZ of the previous move. if (!zeroing) dtz += sign_of(dtz); // Skip the draws and if we are winning only pick positive dtz if (dtz < minDTZ && sign_of(dtz) == sign_of(wdl)) minDTZ = dtz; pos.undo_move(move); if (*result == FAIL) return 0; } // When there are no legal moves, the position is mate: we return -1 return minDTZ == 0xFFFF ? -1 : minDTZ; } // Use the DTZ tables to rank root moves. // // A return value false indicates that not all probes were successful. bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ, const std::function& time_abort) { ProbeState result = OK; StateInfo st; // Obtain 50-move counter for the root position int cnt50 = pos.rule50_count(); // Check whether a position was repeated since the last zeroing move. bool rep = pos.has_repeated(); int dtz, bound = rule50 ? (MAX_DTZ / 2 - 100) : 1; // Probe and rank each move for (auto& m : rootMoves) { pos.do_move(m.pv[0], st); // Calculate dtz for the current move counting from the root position if (pos.rule50_count() == 0) { // In case of a zeroing move, dtz is one of -101/-1/0/1/101 WDLScore wdl = -probe_wdl(pos, &result); dtz = dtz_before_zeroing(wdl); } else if ((rule50 && pos.is_draw(1)) || pos.is_repetition(1)) { // In case a root move leads to a draw by repetition or 50-move rule, // we set dtz to zero. Note: since we are only 1 ply from the root, // this must be a true 3-fold repetition inside the game history. dtz = 0; } else { // Otherwise, take dtz for the new position and correct by 1 ply dtz = -probe_dtz(pos, &result); dtz = dtz > 0 ? dtz + 1 : dtz < 0 ? dtz - 1 : dtz; } // Make sure that a mating move is assigned a dtz value of 1 if (pos.checkers() && dtz == 2 && MoveList(pos).size() == 0) dtz = 1; pos.undo_move(m.pv[0]); if (time_abort() || result == FAIL) return false; // Better moves are ranked higher. Certain wins are ranked equally. // Losing moves are ranked equally unless a 50-move draw is in sight. int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ - (rankDTZ ? dtz : 0) : MAX_DTZ / 2 - (dtz + cnt50)) : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ - (rankDTZ ? dtz : 0) : -MAX_DTZ / 2 + (-dtz + cnt50)) : 0; m.tbRank = r; // Determine the score to be displayed for this move. Assign at least // 1 cp to cursed wins and let it grow to 49 cp as the positions gets // closer to a real win. m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 : r > 0 ? Value((std::max(3, r - (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) : r == 0 ? VALUE_DRAW : r > -bound ? Value((std::min(-3, r + (MAX_DTZ / 2 - 200)) * int(PawnValue)) / 200) : -VALUE_MATE + MAX_PLY + 1; } return true; } // Use the WDL tables to rank root moves. // This is a fallback for the case that some or all DTZ tables are missing. // // A return value false indicates that not all probes were successful. bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50) { static const int WDL_to_rank[] = {-MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ}; ProbeState result = OK; StateInfo st; WDLScore wdl; // Probe and rank each move for (auto& m : rootMoves) { pos.do_move(m.pv[0], st); if (pos.is_draw(1)) wdl = WDLDraw; else wdl = -probe_wdl(pos, &result); pos.undo_move(m.pv[0]); if (result == FAIL) return false; m.tbRank = WDL_to_rank[wdl + 2]; if (!rule50) wdl = wdl > WDLDraw ? WDLWin : wdl < WDLDraw ? WDLLoss : WDLDraw; m.tbScore = WDL_to_value[wdl + 2]; } return true; } Config Tablebases::rank_root_moves(const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves, bool rankDTZ, const std::function& time_abort) { Config config; if (rootMoves.empty()) return config; config.rootInTB = false; config.useRule50 = bool(options["Syzygy50MoveRule"]); config.probeDepth = int(options["SyzygyProbeDepth"]); config.cardinality = int(options["SyzygyProbeLimit"]); bool dtz_available = true; // Tables with fewer pieces than SyzygyProbeLimit are searched with // probeDepth == DEPTH_ZERO if (config.cardinality > MaxCardinality) { config.cardinality = MaxCardinality; config.probeDepth = 0; } if (config.cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { // Rank moves using DTZ tables, bail out if time_abort flags zeitnot config.rootInTB = root_probe(pos, rootMoves, options["Syzygy50MoveRule"], rankDTZ, time_abort); if (!config.rootInTB && !time_abort()) { // DTZ tables are missing; try to rank moves using WDL tables dtz_available = false; config.rootInTB = root_probe_wdl(pos, rootMoves, options["Syzygy50MoveRule"]); } } if (config.rootInTB) { // Sort moves according to TB rank std::stable_sort( rootMoves.begin(), rootMoves.end(), [](const Search::RootMove& a, const Search::RootMove& b) { return a.tbRank > b.tbRank; }); // Probe during search only if DTZ is not available and we are winning if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) config.cardinality = 0; } else { // Clean up if root_probe() and root_probe_wdl() have failed for (auto& m : rootMoves) m.tbRank = 0; } return config; } } // namespace Stockfish ================================================ FILE: src/syzygy/tbprobe.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef TBPROBE_H #define TBPROBE_H #include #include #include namespace Stockfish { class Position; class OptionsMap; using Depth = int; namespace Search { struct RootMove; using RootMoves = std::vector; } } namespace Stockfish::Tablebases { struct Config { int cardinality = 0; bool rootInTB = false; bool useRule50 = false; Depth probeDepth = 0; }; enum WDLScore { WDLLoss = -2, // Loss WDLBlessedLoss = -1, // Loss, but draw under 50-move rule WDLDraw = 0, // Draw WDLCursedWin = 1, // Win, but draw under 50-move rule WDLWin = 2, // Win }; // Possible states after a probing operation enum ProbeState { FAIL = 0, // Probe failed (missing file table) OK = 1, // Probe successful CHANGE_STM = -1, // DTZ should check the other side ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; extern int MaxCardinality; void init(const std::string& paths); WDLScore probe_wdl(Position& pos, ProbeState* result); int probe_dtz(Position& pos, ProbeState* result); bool root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ, const std::function& time_abort); bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50); Config rank_root_moves( const OptionsMap& options, Position& pos, Search::RootMoves& rootMoves, bool rankDTZ = false, const std::function& time_abort = []() { return false; }); } // namespace Stockfish::Tablebases #endif ================================================ FILE: src/thread.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "thread.h" #include #include #include #include #include #include #include #include #include "bitboard.h" #include "history.h" #include "memory.h" #include "movegen.h" #include "search.h" #include "syzygy/tbprobe.h" #include "timeman.h" #include "types.h" #include "uci.h" #include "ucioption.h" namespace Stockfish { // Constructor launches the thread and waits until it goes to sleep // in idle_loop(). Note that 'searching' and 'exit' should be already set. Thread::Thread(Search::SharedState& sharedState, std::unique_ptr sm, size_t n, size_t numaN, size_t totalNumaCount, OptionalThreadToNumaNodeBinder binder) : idx(n), idxInNuma(numaN), totalNuma(totalNumaCount), nthreads(sharedState.options["Threads"]), stdThread(&Thread::idle_loop, this) { wait_for_search_finished(); run_custom_job([this, &binder, &sharedState, &sm, n]() { // Use the binder to [maybe] bind the threads to a NUMA node before doing // the Worker allocation. Ideally we would also allocate the SearchManager // here, but that's minor. this->numaAccessToken = binder(); this->worker = make_unique_large_page( sharedState, std::move(sm), n, idxInNuma, totalNuma, this->numaAccessToken); }); wait_for_search_finished(); } // Destructor wakes up the thread in idle_loop() and waits // for its termination. Thread should be already waiting. Thread::~Thread() { assert(!searching); exit = true; start_searching(); stdThread.join(); } // Wakes up the thread that will start the search void Thread::start_searching() { assert(worker != nullptr); run_custom_job([this]() { worker->start_searching(); }); } // Clears the histories for the thread worker (usually before a new game) void Thread::clear_worker() { assert(worker != nullptr); run_custom_job([this]() { worker->clear(); }); } // Blocks on the condition variable until the thread has finished searching void Thread::wait_for_search_finished() { std::unique_lock lk(mutex); cv.wait(lk, [&] { return !searching; }); } // Launching a function in the thread void Thread::run_custom_job(std::function f) { { std::unique_lock lk(mutex); cv.wait(lk, [&] { return !searching; }); jobFunc = std::move(f); searching = true; } cv.notify_one(); } void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); } // Thread gets parked here, blocked on the condition variable // when the thread has no work to do. void Thread::idle_loop() { while (true) { std::unique_lock lk(mutex); searching = false; cv.notify_one(); // Wake up anyone waiting for search finished cv.wait(lk, [&] { return searching; }); if (exit) return; std::function job = std::move(jobFunc); jobFunc = nullptr; lk.unlock(); if (job) job(); } } Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); } uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); } uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); } static size_t next_power_of_two(uint64_t count) { return count > 1 ? (2ULL << msb(count - 1)) : 1; } // Creates/destroys threads to match the requested number. // Created and launched threads will immediately go to sleep in idle_loop. // Upon resizing, threads are recreated to allow for binding if necessary. void ThreadPool::set(const NumaConfig& numaConfig, Search::SharedState sharedState, const Search::SearchManager::UpdateContext& updateContext) { if (threads.size() > 0) // destroy any existing thread(s) { main_thread()->wait_for_search_finished(); threads.clear(); boundThreadToNumaNode.clear(); } const size_t requested = sharedState.options["Threads"]; if (requested > 0) // create new thread(s) { // Binding threads may be problematic when there's multiple NUMA nodes and // multiple Stockfish instances running. In particular, if each instance // runs a single thread then they would all be mapped to the first NUMA node. // This is undesirable, and so the default behaviour (i.e. when the user does not // change the NumaConfig UCI setting) is to not bind the threads to processors // unless we know for sure that we span NUMA nodes and replication is required. const std::string numaPolicy(sharedState.options["NumaPolicy"]); const bool doBindThreads = [&]() { if (numaPolicy == "none") return false; if (numaPolicy == "auto") return numaConfig.suggests_binding_threads(requested); // numaPolicy == "system", or explicitly set by the user return true; }(); std::map counts; boundThreadToNumaNode = doBindThreads ? numaConfig.distribute_threads_among_numa_nodes(requested) : std::vector{}; if (boundThreadToNumaNode.empty()) counts[0] = requested; // Pretend all threads are part of numa node 0 else { for (size_t i = 0; i < boundThreadToNumaNode.size(); ++i) counts[boundThreadToNumaNode[i]]++; } sharedState.sharedHistories.clear(); for (auto pair : counts) { NumaIndex numaIndex = pair.first; uint64_t count = pair.second; auto f = [&]() { sharedState.sharedHistories.try_emplace(numaIndex, next_power_of_two(count)); }; if (doBindThreads) numaConfig.execute_on_numa_node(numaIndex, f); else f(); } auto threadsPerNode = counts; counts.clear(); while (threads.size() < requested) { const size_t threadId = threads.size(); const NumaIndex numaId = doBindThreads ? boundThreadToNumaNode[threadId] : 0; auto create_thread = [&]() { auto manager = threadId == 0 ? std::unique_ptr( std::make_unique(updateContext)) : std::make_unique(); // When not binding threads we want to force all access to happen // from the same NUMA node, because in case of NUMA replicated memory // accesses we don't want to trash cache in case the threads get scheduled // on the same NUMA node. auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId) : OptionalThreadToNumaNodeBinder(numaId); threads.emplace_back(std::make_unique(sharedState, std::move(manager), threadId, counts[numaId]++, threadsPerNode[numaId], binder)); }; // Ensure the worker thread inherits the intended NUMA affinity at creation. if (doBindThreads) numaConfig.execute_on_numa_node(numaId, create_thread); else create_thread(); } clear(); main_thread()->wait_for_search_finished(); } } // Sets threadPool data to initial values void ThreadPool::clear() { if (threads.size() == 0) return; for (auto&& th : threads) th->clear_worker(); for (auto&& th : threads) th->wait_for_search_finished(); // These two affect the time taken on the first move of a game: main_manager()->bestPreviousAverageScore = VALUE_INFINITE; main_manager()->previousTimeReduction = 0.85; main_manager()->callsCnt = 0; main_manager()->bestPreviousScore = VALUE_INFINITE; main_manager()->originalTimeAdjust = -1; main_manager()->tm.clear(); } void ThreadPool::run_on_thread(size_t threadId, std::function f) { assert(threads.size() > threadId); threads[threadId]->run_custom_job(std::move(f)); } void ThreadPool::wait_on_thread(size_t threadId) { assert(threads.size() > threadId); threads[threadId]->wait_for_search_finished(); } size_t ThreadPool::num_threads() const { return threads.size(); } // Wakes up main thread waiting in idle_loop() and returns immediately. // Main thread will wake up other threads and start the search. void ThreadPool::start_thinking(const OptionsMap& options, Position& pos, StateListPtr& states, Search::LimitsType limits) { main_thread()->wait_for_search_finished(); main_manager()->stopOnPonderhit = stop = false; main_manager()->ponder = limits.ponderMode; increaseDepth = true; Search::RootMoves rootMoves; const auto legalmoves = MoveList(pos); for (const auto& uciMove : limits.searchmoves) { auto move = UCIEngine::to_move(pos, uciMove); if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end()) rootMoves.emplace_back(move); } if (rootMoves.empty()) for (const auto& m : legalmoves) rootMoves.emplace_back(m); Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves); // After ownership transfer 'states' becomes empty, so if we stop the search // and call 'go' again without setting a new position states.get() == nullptr. assert(states.get() || setupStates.get()); if (states.get()) setupStates = std::move(states); // Ownership transfer, states is now empty // We use Position::set() to set root position across threads. But there are // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot // be deduced from a fen string, so set() clears them and they are set from // setupStates->back() later. The rootState is per thread, earlier states are // shared since they are read-only. for (auto&& th : threads) { th->run_custom_job([&]() { th->worker->limits = limits; th->worker->nodes = th->worker->tbHits = th->worker->bestMoveChanges = 0; th->worker->nmpMinPly = 0; th->worker->rootDepth = th->worker->completedDepth = 0; th->worker->rootMoves = rootMoves; th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState); th->worker->rootState = setupStates->back(); th->worker->tbConfig = tbConfig; }); } for (auto&& th : threads) th->wait_for_search_finished(); main_thread()->start_searching(); } Thread* ThreadPool::get_best_thread() const { Thread* bestThread = threads.front().get(); Value minScore = VALUE_NONE; std::unordered_map votes( 2 * std::min(size(), bestThread->worker->rootMoves.size())); // Find the minimum score of all threads for (auto&& th : threads) minScore = std::min(minScore, th->worker->rootMoves[0].score); // Vote according to score and depth, and select the best thread auto thread_voting_value = [minScore](Thread* th) { return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth); }; for (auto&& th : threads) votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get()); auto has_bound = [](const Thread* th) { return th->worker->rootMoves[0].scoreLowerbound || th->worker->rootMoves[0].scoreUpperbound; }; for (auto&& th : threads) { const auto bestThreadScore = bestThread->worker->rootMoves[0].score; const auto newThreadScore = th->worker->rootMoves[0].score; const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv; const auto& newThreadPV = th->worker->rootMoves[0].pv; const auto bestThreadMoveVote = votes[bestThreadPV[0]]; const auto newThreadMoveVote = votes[newThreadPV[0]]; // Aborted searches may lead to inexact win scores. const bool bestThreadInProvenWin = is_win(bestThreadScore) && !has_bound(bestThread); const bool newThreadInProvenWin = is_win(newThreadScore) && !has_bound(th.get()); // Loss scores may be inexact only for aborted d1 searches. const bool bestThreadInProvenLoss = bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore) && !has_bound(bestThread); const bool newThreadInProvenLoss = newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore) && !has_bound(th.get()); // We make sure not to pick a thread with truncated principal variation const bool betterVotingValue = thread_voting_value(th.get()) * int(newThreadPV.size() > 2) > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2); if (bestThreadInProvenWin) { // Make sure we pick the shortest mate / TB conversion if (newThreadInProvenWin && newThreadScore > bestThreadScore) bestThread = th.get(); } else if (bestThreadInProvenLoss) { // Make sure we pick the shortest mated / TB conversion if (newThreadInProvenLoss && newThreadScore < bestThreadScore) bestThread = th.get(); } else if (newThreadInProvenWin || newThreadInProvenLoss || (!is_loss(newThreadScore) && (newThreadMoveVote > bestThreadMoveVote || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue)))) bestThread = th.get(); } return bestThread; } // Start non-main threads. // Will be invoked by main thread after it has started searching. void ThreadPool::start_searching() { for (auto&& th : threads) if (th != threads.front()) th->start_searching(); } // Wait for non-main threads void ThreadPool::wait_for_search_finished() const { for (auto&& th : threads) if (th != threads.front()) th->wait_for_search_finished(); } std::vector ThreadPool::get_bound_thread_count_by_numa_node() const { std::vector counts; if (!boundThreadToNumaNode.empty()) { NumaIndex highestNumaNode = 0; for (NumaIndex n : boundThreadToNumaNode) if (n > highestNumaNode) highestNumaNode = n; counts.resize(highestNumaNode + 1, 0); for (NumaIndex n : boundThreadToNumaNode) counts[n] += 1; } return counts; } void ThreadPool::ensure_network_replicated() { for (auto&& th : threads) th->ensure_network_replicated(); } } // namespace Stockfish ================================================ FILE: src/thread.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef THREAD_H_INCLUDED #define THREAD_H_INCLUDED #include #include #include #include #include #include #include #include #include "memory.h" #include "numa.h" #include "position.h" #include "search.h" #include "thread_win32_osx.h" namespace Stockfish { class OptionsMap; using Value = int; // Sometimes we don't want to actually bind the threads, but the recipient still // needs to think it runs on *some* NUMA node, such that it can access structures // that rely on NUMA node knowledge. This class encapsulates this optional process // such that the recipient does not need to know whether the binding happened or not. class OptionalThreadToNumaNodeBinder { public: OptionalThreadToNumaNodeBinder(NumaIndex n) : numaConfig(nullptr), numaId(n) {} OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) : numaConfig(&cfg), numaId(n) {} NumaReplicatedAccessToken operator()() const { if (numaConfig != nullptr) return numaConfig->bind_current_thread_to_numa_node(numaId); else return NumaReplicatedAccessToken(numaId); } private: const NumaConfig* numaConfig; NumaIndex numaId; }; // Abstraction of a thread. It contains a pointer to the worker and a native thread. // After construction, the native thread is started with idle_loop() // waiting for a signal to start searching. // When the signal is received, the thread starts searching and when // the search is finished, it goes back to idle_loop() waiting for a new signal. class Thread { public: Thread(Search::SharedState&, std::unique_ptr, size_t, size_t, size_t, OptionalThreadToNumaNodeBinder); virtual ~Thread(); void idle_loop(); void start_searching(); void clear_worker(); void run_custom_job(std::function f); void ensure_network_replicated(); // Thread has been slightly altered to allow running custom jobs, so // this name is no longer correct. However, this class (and ThreadPool) // require further work to make them properly generic while maintaining // appropriate specificity regarding search, from the point of view of an // outside user, so renaming of this function is left for whenever that happens. void wait_for_search_finished(); size_t id() const { return idx; } LargePagePtr worker; std::function jobFunc; private: std::mutex mutex; std::condition_variable cv; size_t idx, idxInNuma, totalNuma, nthreads; bool exit = false, searching = true; // Set before starting std::thread NativeThread stdThread; NumaReplicatedAccessToken numaAccessToken; }; // ThreadPool struct handles all the threads-related stuff like init, starting, // parking and, most importantly, launching a thread. All the access to threads // is done through this class. class ThreadPool { public: ThreadPool() {} ~ThreadPool() { // destroy any existing thread(s) if (threads.size() > 0) { main_thread()->wait_for_search_finished(); threads.clear(); } } ThreadPool(const ThreadPool&) = delete; ThreadPool(ThreadPool&&) = delete; ThreadPool& operator=(const ThreadPool&) = delete; ThreadPool& operator=(ThreadPool&&) = delete; void start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType); void run_on_thread(size_t threadId, std::function f); void wait_on_thread(size_t threadId); size_t num_threads() const; void clear(); void set(const NumaConfig& numaConfig, Search::SharedState, const Search::SearchManager::UpdateContext&); Search::SearchManager* main_manager(); Thread* main_thread() const { return threads.front().get(); } uint64_t nodes_searched() const; uint64_t tb_hits() const; Thread* get_best_thread() const; void start_searching(); void wait_for_search_finished() const; std::vector get_bound_thread_count_by_numa_node() const; void ensure_network_replicated(); std::atomic_bool stop, increaseDepth; auto cbegin() const noexcept { return threads.cbegin(); } auto begin() noexcept { return threads.begin(); } auto end() noexcept { return threads.end(); } auto cend() const noexcept { return threads.cend(); } auto size() const noexcept { return threads.size(); } auto empty() const noexcept { return threads.empty(); } private: StateListPtr setupStates; std::vector> threads; std::vector boundThreadToNumaNode; uint64_t accumulate(std::atomic Search::Worker::* member) const { uint64_t sum = 0; for (auto&& th : threads) sum += (th->worker.get()->*member).load(std::memory_order_relaxed); return sum; } }; } // namespace Stockfish #endif // #ifndef THREAD_H_INCLUDED ================================================ FILE: src/thread_win32_osx.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef THREAD_WIN32_OSX_H_INCLUDED #define THREAD_WIN32_OSX_H_INCLUDED #include // On OSX threads other than the main thread are created with a reduced stack // size of 512KB by default, this is too low for deep searches, which require // somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE. // The implementation calls pthread_create() with the stack size parameter // equal to the Linux 8MB default, on platforms that support it. #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) #include #include namespace Stockfish { class NativeThread { pthread_t thread; static constexpr size_t TH_STACK_SIZE = 8 * 1024 * 1024; public: template explicit NativeThread(Function&& fun, Args&&... args) { auto func = new std::function( std::bind(std::forward(fun), std::forward(args)...)); pthread_attr_t attr_storage, *attr = &attr_storage; pthread_attr_init(attr); pthread_attr_setstacksize(attr, TH_STACK_SIZE); auto start_routine = [](void* ptr) -> void* { auto f = reinterpret_cast*>(ptr); // Call the function (*f)(); delete f; return nullptr; }; pthread_create(&thread, attr, start_routine, func); } void join() { pthread_join(thread, nullptr); } }; } // namespace Stockfish #else // Default case: use STL classes namespace Stockfish { using NativeThread = std::thread; } // namespace Stockfish #endif #endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED ================================================ FILE: src/timeman.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "timeman.h" #include #include #include #include #include "search.h" #include "ucioption.h" namespace Stockfish { TimePoint TimeManagement::optimum() const { return optimumTime; } TimePoint TimeManagement::maximum() const { return maximumTime; } void TimeManagement::clear() { availableNodes = -1; // When in 'nodes as time' mode } void TimeManagement::advance_nodes_time(std::int64_t nodes) { assert(useNodesTime); availableNodes = std::max(int64_t(0), availableNodes - nodes); } // Called at the beginning of the search and calculates // the bounds of time allowed for the current game ply. We currently support: // 1) x basetime (+ z increment) // 2) x moves in y seconds (+ z increment) void TimeManagement::init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, double& originalTimeAdjust) { TimePoint npmsec = TimePoint(options["nodestime"]); // If we have no time, we don't need to fully initialize TM. // startTime is used by movetime and useNodesTime is used in elapsed calls. startTime = limits.startTime; useNodesTime = npmsec != 0; if (limits.time[us] == 0) return; TimePoint moveOverhead = TimePoint(options["Move Overhead"]); // optScale is a percentage of available time to use for the current move. // maxScale is a multiplier applied to optimumTime. double optScale, maxScale; // If we have to play in 'nodes as time' mode, then convert from time // to nodes, and use resulting values in time management formulas. // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) // must be much lower than the real engine speed. if (useNodesTime) { if (availableNodes == -1) // Only once at game start availableNodes = npmsec * limits.time[us]; // Time is in msec // Convert from milliseconds to nodes limits.time[us] = TimePoint(availableNodes); limits.inc[us] *= npmsec; limits.npmsec = npmsec; moveOverhead *= npmsec; } // These numbers are used where multiplications, divisions or comparisons // with constants are involved. const int64_t scaleFactor = useNodesTime ? npmsec : 1; const TimePoint scaledTime = limits.time[us] / scaleFactor; // Maximum move horizon int centiMTG = limits.movestogo ? std::min(limits.movestogo * 100, 5000) : 5051; // If less than one second, gradually reduce mtg if (scaledTime < 1000) centiMTG = int(scaledTime * 5.051); // Make sure timeLeft is > 0 since we may use it as a divisor TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + (limits.inc[us] * (centiMTG - 100) - moveOverhead * (200 + centiMTG)) / 100); // x basetime (+ z increment) // If there is a healthy increment, timeLeft can exceed the actual available // game time for the current move, so also cap to a percentage of available game time. if (limits.movestogo == 0) { // Extra time according to timeLeft if (originalTimeAdjust < 0) originalTimeAdjust = 0.3272 * std::log10(timeLeft) - 0.4141; // Calculate time constants based on current time left. double logTimeInSec = std::log10(scaledTime / 1000.0); double optConstant = std::min(0.0029869 + 0.00033554 * logTimeInSec, 0.004905); double maxConstant = std::max(3.3744 + 3.0608 * logTimeInSec, 3.1441); optScale = std::min(0.012112 + std::pow(ply + 3.22713, 0.46866) * optConstant, 0.19404 * limits.time[us] / timeLeft) * originalTimeAdjust; maxScale = std::min(6.873, maxConstant + ply / 12.352); } // x moves in y seconds (+ z increment) else { optScale = std::min((0.88 + ply / 116.4) / (centiMTG / 100.0), 0.88 * limits.time[us] / timeLeft); maxScale = 1.3 + 0.11 * (centiMTG / 100.0); } // Limit the maximum possible time for this move optimumTime = TimePoint(std::max(1.0, optScale * timeLeft)); maximumTime = TimePoint(std::max(double(optimumTime), std::min(0.8097 * limits.time[us] - moveOverhead, maxScale * optimumTime))); if (options["Ponder"]) optimumTime += optimumTime / 4; } } // namespace Stockfish ================================================ FILE: src/timeman.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef TIMEMAN_H_INCLUDED #define TIMEMAN_H_INCLUDED #include #include "misc.h" namespace Stockfish { class OptionsMap; enum Color : uint8_t; namespace Search { struct LimitsType; } // The TimeManagement class computes the optimal time to think depending on // the maximum available time, the game move number, and other parameters. class TimeManagement { public: void init(Search::LimitsType& limits, Color us, int ply, const OptionsMap& options, double& originalTimeAdjust); TimePoint optimum() const; TimePoint maximum() const; template TimePoint elapsed(FUNC nodes) const { return useNodesTime ? TimePoint(nodes()) : elapsed_time(); } TimePoint elapsed_time() const { return now() - startTime; }; void clear(); void advance_nodes_time(std::int64_t nodes); private: TimePoint startTime; TimePoint optimumTime; TimePoint maximumTime; std::int64_t availableNodes = -1; // When in 'nodes as time' mode bool useNodesTime = false; // True if we are in 'nodes as time' mode }; } // namespace Stockfish #endif // #ifndef TIMEMAN_H_INCLUDED ================================================ FILE: src/tt.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "tt.h" #include #include #include #include #include #include "memory.h" #include "misc.h" #include "syzygy/tbprobe.h" #include "thread.h" namespace Stockfish { // TTEntry struct is the 10 bytes transposition table entry, defined as below: // // key 16 bit // depth 8 bit // generation 5 bit // pv node 1 bit // bound type 2 bit // move 16 bit // value 16 bit // evaluation 16 bit // // These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially. // Equally, the store order in save() matches this order. struct TTEntry { // Convert internal bitfields to external types TTData read() const { return TTData{Move(move16), Value(value16), Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET), Bound(genBound8 & 0x3), bool(genBound8 & 0x4)}; } bool is_occupied() const; void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); // The returned age is a multiple of TranspositionTable::GENERATION_DELTA uint8_t relative_age(const uint8_t generation8) const; private: friend class TranspositionTable; uint16_t key16; uint8_t depth8; uint8_t genBound8; Move move16; int16_t value16; int16_t eval16; }; // `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits // and 3 trailing miscellaneous bits. // These bits are reserved for other things. static constexpr unsigned GENERATION_BITS = 3; // increment for generation field static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); // cycle length static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA; // mask to pull out generation number static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; // DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but // 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits": // we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.) bool TTEntry::is_occupied() const { return bool(depth8); } // Populates the TTEntry with a new node's data, possibly // overwriting an old position. The update is not atomic and can be racy. void TTEntry::save( Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { // Preserve the old ttmove if we don't have a new one if (m || uint16_t(k) != key16) move16 = m; // Overwrite less valuable entries (cheapest checks first) if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4 || relative_age(generation8)) { assert(d > DEPTH_ENTRY_OFFSET); assert(d < 256 + DEPTH_ENTRY_OFFSET); key16 = uint16_t(k); depth8 = uint8_t(d - DEPTH_ENTRY_OFFSET); genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b); value16 = int16_t(v); eval16 = int16_t(ev); } } uint8_t TTEntry::relative_age(const uint8_t generation8) const { // Due to our packed storage format for generation and its cyclic // nature we add GENERATION_CYCLE (256 is the modulus, plus what // is needed to keep the unrelated lowest n bits from affecting // the result) to calculate the entry age correctly even after // generation8 overflows into the next cycle. return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK; } // TTWriter is but a very thin wrapper around the pointer TTWriter::TTWriter(TTEntry* tte) : entry(tte) {} void TTWriter::write( Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) { entry->save(k, v, pv, b, d, m, ev, generation8); } // A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number // of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should // divide the size of a cache line for best performance, as the cacheline is prefetched when possible. static constexpr int ClusterSize = 3; struct Cluster { TTEntry entry[ClusterSize]; char padding[2]; // Pad to 32 bytes }; static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size"); // Sets the size of the transposition table, // measured in megabytes. Transposition table consists // of clusters and each cluster consists of ClusterSize number of TTEntry. void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) { aligned_large_pages_free(table); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); if (!table) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; exit(EXIT_FAILURE); } clear(threads); } // Initializes the entire transposition table to zero, // in a multi-threaded way. void TranspositionTable::clear(ThreadPool& threads) { generation8 = 0; const size_t threadCount = threads.num_threads(); for (size_t i = 0; i < threadCount; ++i) { threads.run_on_thread(i, [this, i, threadCount]() { // Each thread will zero its part of the hash table const size_t stride = clusterCount / threadCount; const size_t start = stride * i; const size_t len = i + 1 != threadCount ? stride : clusterCount - start; std::memset(&table[start], 0, len * sizeof(Cluster)); }); } for (size_t i = 0; i < threadCount; ++i) threads.wait_on_thread(i); } // Returns an approximation of the hashtable // occupation during a search. The hash is x permill full, as per UCI protocol. // Only counts entries which match the current generation. int TranspositionTable::hashfull(int maxAge) const { int maxAgeInternal = maxAge << GENERATION_BITS; int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) cnt += table[i].entry[j].is_occupied() && table[i].entry[j].relative_age(generation8) <= maxAgeInternal; return cnt / ClusterSize; } void TranspositionTable::new_search() { // increment by delta to keep lower bits as is generation8 += GENERATION_DELTA; } uint8_t TranspositionTable::generation() const { return generation8; } // Looks up the current position in the transposition // table. It returns true if the position is found. // Otherwise, it returns false and a pointer to an empty or least valuable TTEntry // to be replaced later. The replace value of an entry is calculated as its depth // minus 8 times its relative age. TTEntry t1 is considered more valuable than // TTEntry t2 if its replace value is greater than that of t2. std::tuple TranspositionTable::probe(const Key key) const { TTEntry* const tte = first_entry(key); const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster for (int i = 0; i < ClusterSize; ++i) if (tte[i].key16 == key16) // This gap is the main place for read races. // After `read()` completes that copy is final, but may be self-inconsistent. return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])}; // Find an entry to be replaced according to the replacement strategy TTEntry* replace = tte; for (int i = 1; i < ClusterSize; ++i) if (replace->depth8 - replace->relative_age(generation8) > tte[i].depth8 - tte[i].relative_age(generation8)) replace = &tte[i]; return {false, TTData{Move::none(), VALUE_NONE, VALUE_NONE, DEPTH_ENTRY_OFFSET, BOUND_NONE, false}, TTWriter(replace)}; } TTEntry* TranspositionTable::first_entry(const Key key) const { return &table[mul_hi64(key, clusterCount)].entry[0]; } } // namespace Stockfish ================================================ FILE: src/tt.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef TT_H_INCLUDED #define TT_H_INCLUDED #include #include #include #include "memory.h" #include "types.h" namespace Stockfish { class ThreadPool; struct TTEntry; struct Cluster; // There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy // updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and // thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate // reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size. // // `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of: // 1) whether the entry already has this position // 2) a copy of the prior data (if any) (may be inconsistent due to read races) // 3) a writer object to this entry // The copied data and the writer are separated to maintain clear boundaries between local vs global objects. // A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data. struct TTData { Move move; Value value, eval; Depth depth; Bound bound; bool is_pv; TTData() = delete; // clang-format off TTData(Move m, Value v, Value ev, Depth d, Bound b, bool pv) : move(m), value(v), eval(ev), depth(d), bound(b), is_pv(pv) {}; // clang-format on }; // This is used to make racy writes to the global TT. struct TTWriter { public: void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8); private: friend class TranspositionTable; TTEntry* entry; TTWriter(TTEntry* tte); }; class TranspositionTable { public: ~TranspositionTable() { aligned_large_pages_free(table); } void resize(size_t mbSize, ThreadPool& threads); // Set TT size void clear(ThreadPool& threads); // Re-initialize memory, multithreaded int hashfull(int maxAge = 0) const; // Approximate what fraction of entries (permille) have been written to during this root search void new_search(); // This must be called at the beginning of each root search to track entry aging uint8_t generation() const; // The current age, used when writing new data to the TT std::tuple probe(const Key key) const; // The main method, whose retvals separate local vs global objects TTEntry* first_entry(const Key key) const; // This is the hash function; its only external use is memory prefetching. private: friend struct TTEntry; size_t clusterCount; Cluster* table = nullptr; uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8 }; } // namespace Stockfish #endif // #ifndef TT_H_INCLUDED ================================================ FILE: src/tune.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "tune.h" #include #include #include #include #include #include #include "ucioption.h" using std::string; namespace Stockfish { bool Tune::update_on_last; const Option* LastOption = nullptr; OptionsMap* Tune::options; namespace { std::map TuneResults; std::optional on_tune(const Option& o) { if (!Tune::update_on_last || LastOption == &o) Tune::read_options(); return std::nullopt; } } void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) { // Do not generate option when there is nothing to tune (ie. min = max) if (r(v).first == r(v).second) return; if (TuneResults.count(n)) v = TuneResults[n]; opts->add(n, Option(v, r(v).first, r(v).second, on_tune)); LastOption = &((*opts)[n]); // Print formatted parameters, ready to be copy-pasted in Fishtest std::cout << n << "," // << v << "," // << r(v).first << "," // << r(v).second << "," // << (r(v).second - r(v).first) / 20.0 << "," // << "0.0020" << std::endl; } string Tune::next(string& names, bool pop) { string name; do { string token = names.substr(0, names.find(',')); if (pop) names.erase(0, token.size() + 1); std::stringstream ws(token); name += (ws >> token, token); // Remove trailing whitespace } while (std::count(name.begin(), name.end(), '(') - std::count(name.begin(), name.end(), ')')); return name; } template<> void Tune::Entry::init_option() { make_option(options, name, value, range); } template<> void Tune::Entry::read_option() { if (options->count(name)) value = int((*options)[name]); } // Instead of a variable here we have a PostUpdate function: just call it template<> void Tune::Entry::init_option() {} template<> void Tune::Entry::read_option() { value(); } } // namespace Stockfish // Init options with tuning session results instead of default values. Useful to // get correct bench signature after a tuning session or to test tuned values. // Just copy fishtest tuning results in a result.txt file and extract the // values with: // // cat results.txt | sed 's/^param: \([^,]*\), best: \([^,]*\).*/ TuneResults["\1"] = int(round(\2));/' // // Then paste the output below, as the function body namespace Stockfish { void Tune::read_results() { /* ...insert your values here... */ } } // namespace Stockfish ================================================ FILE: src/tune.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef TUNE_H_INCLUDED #define TUNE_H_INCLUDED #include #include #include #include // IWYU pragma: keep #include #include namespace Stockfish { class OptionsMap; using Range = std::pair; // Option's min-max values using RangeFun = Range(int); // Default Range function, to calculate Option's min-max values inline Range default_range(int v) { return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); } struct SetRange { explicit SetRange(RangeFun f) : fun(f) {} SetRange(int min, int max) : fun(nullptr), range(min, max) {} Range operator()(int v) const { return fun ? fun(v) : range; } RangeFun* fun; Range range; }; #define SetDefaultRange SetRange(default_range) // Tune class implements the 'magic' code that makes the setup of a fishtest tuning // session as easy as it can be. Mainly you have just to remove const qualifiers // from the variables you want to tune and flag them for tuning, so if you have: // // const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } }; // // If you have a my_post_update() function to run after values have been updated, // and a my_range() function to set custom Option's min-max values, then you just // remove the 'const' qualifiers and write somewhere below in the file: // // TUNE(SetRange(my_range), myValue, my_post_update); // // You can also set the range directly, and restore the default at the end // // TUNE(SetRange(-100, 100), myValue, SetDefaultRange); // // In case update function is slow and you have many parameters, you can add: // // UPDATE_ON_LAST(); // // And the values update, including post update function call, will be done only // once, after the engine receives the last UCI option, that is the one defined // and created as the last one, so the GUI should send the options in the same // order in which have been defined. class Tune { using PostUpdate = void(); // Post-update function Tune() { read_results(); } Tune(const Tune&) = delete; void operator=(const Tune&) = delete; void read_results(); static Tune& instance() { static Tune t; return t; } // Singleton // Use polymorphism to accommodate Entry of different types in the same vector struct EntryBase { virtual ~EntryBase() = default; virtual void init_option() = 0; virtual void read_option() = 0; }; template struct Entry: public EntryBase { static_assert(!std::is_const_v, "Parameter cannot be const!"); static_assert(std::is_same_v || std::is_same_v, "Parameter type not supported!"); Entry(const std::string& n, T& v, const SetRange& r) : name(n), value(v), range(r) {} void operator=(const Entry&) = delete; // Because 'value' is a reference void init_option() override; void read_option() override; std::string name; T& value; SetRange range; }; // Our facility to fill the container, each Entry corresponds to a parameter // to tune. We use variadic templates to deal with an unspecified number of // entries, each one of a possible different type. static std::string next(std::string& names, bool pop = true); int add(const SetRange&, std::string&&) { return 0; } template int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { list.push_back(std::unique_ptr(new Entry(next(names), value, range))); return add(range, std::move(names), args...); } // Template specialization for arrays: recursively handle multi-dimensional arrays template int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { for (size_t i = 0; i < N; i++) add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); return add(range, std::move(names), args...); } // Template specialization for SetRange template int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { return add(value, (next(names), std::move(names)), args...); } static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r); std::vector> list; public: template static int add(const std::string& names, Args&&... args) { return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), args...); // Remove trailing parenthesis } static void init(OptionsMap& o) { options = &o; for (auto& e : instance().list) e->init_option(); read_options(); } // Deferred, due to UCIEngine::Options access static void read_options() { for (auto& e : instance().list) e->read_option(); } static bool update_on_last; static OptionsMap* options; }; template constexpr void tune_check_args(Args&&...) { static_assert((!std::is_fundamental_v && ...), "TUNE macro arguments wrong"); } // Some macro magic :-) we define a dummy int variable that the compiler initializes calling Tune::add() #define STRINGIFY(x) #x #define UNIQUE2(x, y) x##y #define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ #define TUNE(...) \ int UNIQUE(p, __LINE__) = []() -> int { \ tune_check_args(__VA_ARGS__); \ return Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__); \ }(); #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true } // namespace Stockfish #endif // #ifndef TUNE_H_INCLUDED ================================================ FILE: src/types.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef TYPES_H_INCLUDED #define TYPES_H_INCLUDED // When compiling with provided Makefile (e.g. for Linux and OSX), configuration // is done automatically. To get started type 'make help'. // // When Makefile is not used (e.g. with Microsoft Visual Studio) some switches // need to be set manually: // // -DNDEBUG | Disable debugging mode. Always use this for release. // // -DNO_PREFETCH | Disable use of prefetch asm-instruction. You may need this to // | run on some very old machines. // // -DUSE_POPCNT | Add runtime support for use of popcnt asm-instruction. Works // | only in 64-bit mode and requires hardware with popcnt support. // // -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works // | only in 64-bit mode and requires hardware with pext support. #include #include #include #include #include "misc.h" #if defined(_MSC_VER) // Disable some silly and noisy warnings from MSVC compiler #pragma warning(disable: 4127) // Conditional expression is constant #pragma warning(disable: 4146) // Unary minus operator applied to unsigned type #pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' #endif // Predefined macros hell: // // __GNUC__ Compiler is GCC, Clang or ICX // __clang__ Compiler is Clang or ICX // __INTEL_LLVM_COMPILER Compiler is ICX // _MSC_VER Compiler is MSVC // _WIN32 Building on Windows (any) // _WIN64 Building on Windows 64 bit // Enforce minimum GCC version #if defined(__GNUC__) && !defined(__clang__) \ && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 3)) #error "Stockfish requires GCC 9.3 or later for correct compilation" #endif // Enforce minimum Clang version #if defined(__clang__) && (__clang_major__ < 10) #error "Stockfish requires Clang 10.0 or later for correct compilation" #endif #define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) #if defined(_WIN64) && defined(_MSC_VER) // No Makefile used #include // Microsoft header for _BitScanForward64() #define IS_64BIT #endif #if defined(USE_POPCNT) && defined(_MSC_VER) #include // Microsoft header for _mm_popcnt_u64() #endif #if !defined(NO_PREFETCH) && defined(_MSC_VER) #include // Microsoft header for _mm_prefetch() #endif #if defined(USE_PEXT) #include // Header for _pext_u64() intrinsic #define pext(b, m) _pext_u64(b, m) #else #define pext(b, m) 0 #endif namespace Stockfish { #ifdef USE_POPCNT constexpr bool HasPopCnt = true; #else constexpr bool HasPopCnt = false; #endif #ifdef USE_PEXT constexpr bool HasPext = true; #else constexpr bool HasPext = false; #endif #ifdef IS_64BIT constexpr bool Is64Bit = true; #else constexpr bool Is64Bit = false; #endif using Key = uint64_t; using Bitboard = uint64_t; constexpr int MAX_MOVES = 256; constexpr int MAX_PLY = 246; enum Color : uint8_t { WHITE, BLACK, COLOR_NB = 2 }; enum CastlingRights : uint8_t { NO_CASTLING, WHITE_OO, WHITE_OOO = WHITE_OO << 1, BLACK_OO = WHITE_OO << 2, BLACK_OOO = WHITE_OO << 3, KING_SIDE = WHITE_OO | BLACK_OO, QUEEN_SIDE = WHITE_OOO | BLACK_OOO, WHITE_CASTLING = WHITE_OO | WHITE_OOO, BLACK_CASTLING = BLACK_OO | BLACK_OOO, ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, CASTLING_RIGHT_NB = 16 }; enum Bound : uint8_t { BOUND_NONE, BOUND_UPPER, BOUND_LOWER, BOUND_EXACT = BOUND_UPPER | BOUND_LOWER }; // Value is used as an alias for int, this is done to differentiate between a search // value and any other integer value. The values used in search are always supposed // to be in the range (-VALUE_NONE, VALUE_NONE] and should not exceed this range. using Value = int; constexpr Value VALUE_ZERO = 0; constexpr Value VALUE_DRAW = 0; constexpr Value VALUE_NONE = 32002; constexpr Value VALUE_INFINITE = 32001; constexpr Value VALUE_MATE = 32000; constexpr Value VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY; constexpr Value VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY; constexpr Value VALUE_TB = VALUE_MATE_IN_MAX_PLY - 1; constexpr Value VALUE_TB_WIN_IN_MAX_PLY = VALUE_TB - MAX_PLY; constexpr Value VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY; constexpr bool is_valid(Value value) { return value != VALUE_NONE; } constexpr bool is_win(Value value) { assert(is_valid(value)); return value >= VALUE_TB_WIN_IN_MAX_PLY; } constexpr bool is_loss(Value value) { assert(is_valid(value)); return value <= VALUE_TB_LOSS_IN_MAX_PLY; } constexpr bool is_decisive(Value value) { return is_win(value) || is_loss(value); } // In the code, we make the assumption that these values // are such that non_pawn_material() can be used to uniquely // identify the material on the board. constexpr Value PawnValue = 208; constexpr Value KnightValue = 781; constexpr Value BishopValue = 825; constexpr Value RookValue = 1276; constexpr Value QueenValue = 2538; // clang-format off enum PieceType : std::uint8_t { NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, ALL_PIECES = 0, PIECE_TYPE_NB = 8 }; enum Piece : std::uint8_t { NO_PIECE, W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, PIECE_NB = 16 }; // clang-format on constexpr Value PieceValue[PIECE_NB] = { VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO, VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO}; using Depth = int; // The following DEPTH_ constants are used for transposition table entries // and quiescence search move generation stages. In regular search, the // depth stored in the transposition table is literal: the search depth // (effort) used to make the corresponding transposition table value. In // quiescence search, however, the transposition table entries only store // the current quiescence move generation stage (which should thus compare // lower than any regular search depth). constexpr Depth DEPTH_QS = 0; // For transposition table entries where no searching at all was done // (whether regular or qsearch) we use DEPTH_UNSEARCHED, which should thus // compare lower than any quiescence or regular depth. DEPTH_ENTRY_OFFSET // is used only for the transposition table entry occupancy check (see tt.cpp), // and should thus be lower than DEPTH_UNSEARCHED. constexpr Depth DEPTH_UNSEARCHED = -2; constexpr Depth DEPTH_ENTRY_OFFSET = -3; // clang-format off enum Square : uint8_t { SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, SQ_NONE, SQUARE_ZERO = 0, SQUARE_NB = 64 }; // clang-format on enum Direction : int8_t { NORTH = 8, EAST = 1, SOUTH = -NORTH, WEST = -EAST, NORTH_EAST = NORTH + EAST, SOUTH_EAST = SOUTH + EAST, SOUTH_WEST = SOUTH + WEST, NORTH_WEST = NORTH + WEST }; enum File : uint8_t { FILE_A, FILE_B, FILE_C, FILE_D, FILE_E, FILE_F, FILE_G, FILE_H, FILE_NB }; enum Rank : uint8_t { RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB }; // Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { Piece pc; // this is never allowed to be NO_PIECE Square from, to; // to should be SQ_NONE for promotions // if {add,remove}_sq is SQ_NONE, {add,remove}_pc is allowed to be // uninitialized // castling uses add_sq and remove_sq to remove and add the rook Square remove_sq, add_sq; Piece remove_pc, add_pc; }; // Keep track of what threats change on the board (used by NNUE) struct DirtyThreat { static constexpr int PcSqOffset = 0; static constexpr int ThreatenedSqOffset = 8; static constexpr int ThreatenedPcOffset = 16; static constexpr int PcOffset = 20; DirtyThreat() { /* don't initialize data */ } DirtyThreat(uint32_t raw) : data(raw) {} DirtyThreat(Piece pc, Piece threatened_pc, Square pc_sq, Square threatened_sq, bool add) { data = (uint32_t(add) << 31) | (pc << PcOffset) | (threatened_pc << ThreatenedPcOffset) | (threatened_sq << ThreatenedSqOffset) | (pc_sq << PcSqOffset); } Piece pc() const { return static_cast(data >> PcOffset & 0xf); } Piece threatened_pc() const { return static_cast(data >> ThreatenedPcOffset & 0xf); } Square threatened_sq() const { return static_cast(data >> ThreatenedSqOffset & 0xff); } Square pc_sq() const { return static_cast(data >> PcSqOffset & 0xff); } bool add() const { return data >> 31; } uint32_t raw() const { return data; } private: uint32_t data; }; // A piece can be involved in at most 8 outgoing attacks and 16 incoming attacks. // Moving a piece also can reveal at most 8 discovered attacks. // This implies that a non-castling move can change at most (8 + 16) * 3 + 8 = 80 features. // By similar logic, a castling move can change at most (5 + 1 + 3 + 9) * 2 = 36 features. // Thus, 80 should work as an upper bound. Finally, 16 entries are added to accommodate // unmasked vector stores near the end of the list. using DirtyThreatList = ValueList; struct DirtyThreats { DirtyThreatList list; Color us; Square prevKsq, ksq; Bitboard threatenedSqs, threateningSqs; }; #define ENABLE_INCR_OPERATORS_ON(T) \ constexpr T& operator++(T& d) { return d = T(int(d) + 1); } \ constexpr T& operator--(T& d) { return d = T(int(d) - 1); } ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) ENABLE_INCR_OPERATORS_ON(Rank) #undef ENABLE_INCR_OPERATORS_ON constexpr Direction operator+(Direction d1, Direction d2) { return Direction(int(d1) + int(d2)); } constexpr Direction operator*(int i, Direction d) { return Direction(i * int(d)); } // Additional operators to add a Direction to a Square constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); } constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); } constexpr Square& operator+=(Square& s, Direction d) { return s = s + d; } constexpr Square& operator-=(Square& s, Direction d) { return s = s - d; } // Toggle color constexpr Color operator~(Color c) { return Color(c ^ BLACK); } // Swap A1 <-> A8 constexpr Square flip_rank(Square s) { return Square(s ^ SQ_A8); } // Swap A1 <-> H1 constexpr Square flip_file(Square s) { return Square(s ^ SQ_H1); } // Swap color of piece B_KNIGHT <-> W_KNIGHT constexpr Piece operator~(Piece pc) { return Piece(pc ^ 8); } constexpr CastlingRights operator&(Color c, CastlingRights cr) { return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); } constexpr Value mate_in(int ply) { return VALUE_MATE - ply; } constexpr Value mated_in(int ply) { return -VALUE_MATE + ply; } constexpr Square make_square(File f, Rank r) { return Square((r << 3) + f); } constexpr Piece make_piece(Color c, PieceType pt) { return Piece((c << 3) + pt); } constexpr PieceType type_of(Piece pc) { return PieceType(pc & 7); } constexpr Color color_of(Piece pc) { assert(pc != NO_PIECE); return Color(pc >> 3); } constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } constexpr File file_of(Square s) { return File(s & 7); } constexpr Rank rank_of(Square s) { return Rank(s >> 3); } constexpr Square relative_square(Color c, Square s) { return Square(s ^ (c * 56)); } constexpr Rank relative_rank(Color c, Rank r) { return Rank(r ^ (c * 7)); } constexpr Rank relative_rank(Color c, Square s) { return relative_rank(c, rank_of(s)); } constexpr Direction pawn_push(Color c) { return c == WHITE ? NORTH : SOUTH; } // Based on a congruential pseudo-random number generator constexpr Key make_key(uint64_t seed) { return seed * 6364136223846793005ULL + 1442695040888963407ULL; } enum MoveType : uint16_t { NORMAL, PROMOTION = 1 << 14, EN_PASSANT = 2 << 14, CASTLING = 3 << 14 }; // A move needs 16 bits to be stored // // bit 0- 5: destination square (from 0 to 63) // bit 6-11: origin square (from 0 to 63) // bit 12-13: promotion piece type - 2 (from KNIGHT-2 to QUEEN-2) // bit 14-15: special move flag: promotion (1), en passant (2), castling (3) // NOTE: en passant bit is set only when a pawn can be captured // // Special cases are Move::none() and Move::null(). We can sneak these in because // in any normal move the destination square and origin square are always different, // but Move::none() and Move::null() have the same origin and destination square. class Move { public: Move() = default; constexpr explicit Move(std::uint16_t d) : data(d) {} constexpr Move(Square from, Square to) : data((from << 6) + to) {} template static constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); } constexpr Square from_sq() const { assert(is_ok()); return Square((data >> 6) & 0x3F); } constexpr Square to_sq() const { assert(is_ok()); return Square(data & 0x3F); } // Same as to_sq() but without assertion, for branchless code paths // where the result is masked/ignored when move is not ok constexpr Square to_sq_unchecked() const { return Square(data & 0x3F); } constexpr MoveType type_of() const { return MoveType(data & (3 << 14)); } constexpr PieceType promotion_type() const { return PieceType(((data >> 12) & 3) + KNIGHT); } constexpr bool is_ok() const { return none().data != data && null().data != data; } static constexpr Move null() { return Move(65); } static constexpr Move none() { return Move(0); } constexpr bool operator==(const Move& m) const { return data == m.data; } constexpr bool operator!=(const Move& m) const { return data != m.data; } constexpr explicit operator bool() const { return data != 0; } constexpr std::uint16_t raw() const { return data; } struct MoveHash { std::size_t operator()(const Move& m) const { return make_key(m.data); } }; static constexpr int FromSqShift = 6; static constexpr int ToSqShift = 0; protected: std::uint16_t data; }; template struct is_all_same { static constexpr bool value = (std::is_same_v && ...); }; template constexpr auto is_all_same_v = is_all_same::value; } // namespace Stockfish #endif // #ifndef TYPES_H_INCLUDED #include "tune.h" // Global visibility to tuning setup ================================================ FILE: src/uci.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "uci.h" #include #include #include #include #include #include #include #include #include #include #include #include "benchmark.h" #include "engine.h" #include "memory.h" #include "movegen.h" #include "position.h" #include "score.h" #include "search.h" #include "types.h" #include "ucioption.h" namespace Stockfish { constexpr auto BenchmarkCommand = "speedtest"; constexpr auto StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; template struct overload: Ts... { using Ts::operator()...; }; template overload(Ts...) -> overload; void UCIEngine::print_info_string(std::string_view str) { sync_cout_start(); for (auto& line : split(str, "\n")) { if (!is_whitespace(line)) { std::cout << "info string " << line << '\n'; } } sync_cout_end(); } UCIEngine::UCIEngine(int argc, char** argv) : engine(argv[0]), cli(argc, argv) { engine.get_options().add_info_listener([](const std::optional& str) { if (str.has_value()) print_info_string(*str); }); init_search_update_listeners(); } void UCIEngine::init_search_update_listeners() { engine.set_on_iter([](const auto& i) { on_iter(i); }); engine.set_on_update_no_moves([](const auto& i) { on_update_no_moves(i); }); engine.set_on_update_full( [this](const auto& i) { on_update_full(i, engine.get_options()["UCI_ShowWDL"]); }); engine.set_on_bestmove([](const auto& bm, const auto& p) { on_bestmove(bm, p); }); engine.set_on_verify_networks([](const auto& s) { print_info_string(s); }); } void UCIEngine::loop() { std::string token, cmd; for (int i = 1; i < cli.argc; ++i) cmd += std::string(cli.argv[i]) + " "; do { if (cli.argc == 1 && !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication cmd = "quit"; std::istringstream is(cmd); token.clear(); // Avoid a stale if getline() returns nothing or a blank line is >> std::skipws >> token; if (token == "quit" || token == "stop") engine.stop(); // The GUI sends 'ponderhit' to tell that the user has played the expected move. // So, 'ponderhit' is sent if pondering was done on the same move that the user // has played. The search should continue, but should also switch from pondering // to the normal search. else if (token == "ponderhit") engine.set_ponderhit(false); else if (token == "uci") { sync_cout << "id name " << engine_info(true) << "\n" << engine.get_options() << sync_endl; sync_cout << "uciok" << sync_endl; } else if (token == "setoption") setoption(is); else if (token == "go") { // send info strings after the go command is sent for old GUIs and python-chess print_info_string(engine.numa_config_information_as_string()); print_info_string(engine.thread_allocation_information_as_string()); go(is); } else if (token == "position") position(is); else if (token == "ucinewgame") engine.search_clear(); else if (token == "isready") sync_cout << "readyok" << sync_endl; // Add custom non-UCI commands, mainly for debugging purposes. // These commands must not be used during a search! else if (token == "flip") engine.flip(); else if (token == "bench") bench(is); else if (token == BenchmarkCommand) benchmark(is); else if (token == "d") sync_cout << engine.visualize() << sync_endl; else if (token == "eval") engine.trace_eval(); else if (token == "compiler") sync_cout << compiler_info() << sync_endl; else if (token == "export_net") { std::pair, std::string> files[2]; if (is >> std::skipws >> files[0].second) files[0].first = files[0].second; if (is >> std::skipws >> files[1].second) files[1].first = files[1].second; engine.save_network(files); } else if (token == "--help" || token == "help" || token == "--license" || token == "license") sync_cout << "\nStockfish is a powerful chess engine for playing and analyzing." "\nIt is released as free software licensed under the GNU GPLv3 License." "\nStockfish is normally used with a graphical user interface (GUI) and implements" "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" << sync_endl; else if (!token.empty() && token[0] != '#') sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." << sync_endl; } while (token != "quit" && cli.argc == 1); // The command-line arguments are one-shot } Search::LimitsType UCIEngine::parse_limits(std::istream& is) { Search::LimitsType limits; std::string token; limits.startTime = now(); // The search starts as early as possible while (is >> token) if (token == "searchmoves") // Needs to be the last command on the line while (is >> token) limits.searchmoves.push_back(to_lower(token)); else if (token == "wtime") is >> limits.time[WHITE]; else if (token == "btime") is >> limits.time[BLACK]; else if (token == "winc") is >> limits.inc[WHITE]; else if (token == "binc") is >> limits.inc[BLACK]; else if (token == "movestogo") is >> limits.movestogo; else if (token == "depth") is >> limits.depth; else if (token == "nodes") is >> limits.nodes; else if (token == "movetime") is >> limits.movetime; else if (token == "mate") is >> limits.mate; else if (token == "perft") is >> limits.perft; else if (token == "infinite") limits.infinite = 1; else if (token == "ponder") limits.ponderMode = true; return limits; } void UCIEngine::go(std::istringstream& is) { Search::LimitsType limits = parse_limits(is); if (limits.perft) perft(limits); else engine.go(limits); } void UCIEngine::bench(std::istream& args) { std::string token; uint64_t num, nodes = 0, cnt = 1; uint64_t nodesSearched = 0; const auto& options = engine.get_options(); engine.set_on_update_full([&](const auto& i) { nodesSearched = i.nodes; on_update_full(i, options["UCI_ShowWDL"]); }); std::vector list = Benchmark::setup_bench(engine.fen(), args); num = count_if(list.begin(), list.end(), [](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); TimePoint elapsed = now(); for (const auto& cmd : list) { std::istringstream is(cmd); is >> std::skipws >> token; if (token == "go" || token == "eval") { std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << engine.fen() << ")" << std::endl; if (token == "go") { Search::LimitsType limits = parse_limits(is); if (limits.perft) nodesSearched = perft(limits); else { engine.go(limits); engine.wait_for_search_finished(); } nodes += nodesSearched; nodesSearched = 0; } else engine.trace_eval(); } else if (token == "setoption") setoption(is); else if (token == "position") position(is); else if (token == "ucinewgame") { engine.search_clear(); // search_clear may take a while elapsed = now(); } } elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' dbg_print(); std::cerr << "\n===========================" // << "\nTotal time (ms) : " << elapsed // << "\nNodes searched : " << nodes // << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; // reset callback, to not capture a dangling reference to nodesSearched engine.set_on_update_full([&](const auto& i) { on_update_full(i, options["UCI_ShowWDL"]); }); } void UCIEngine::benchmark(std::istream& args) { // Probably not very important for a test this long, but include for completeness and sanity. static constexpr int NUM_WARMUP_POSITIONS = 3; std::string token; uint64_t nodes = 0, cnt = 1; uint64_t nodesSearched = 0; engine.set_on_update_full([&](const Engine::InfoFull& i) { nodesSearched = i.nodes; }); engine.set_on_iter([](const auto&) {}); engine.set_on_update_no_moves([](const auto&) {}); engine.set_on_bestmove([](const auto&, const auto&) {}); engine.set_on_verify_networks([](const auto&) {}); Benchmark::BenchmarkSetup setup = Benchmark::setup_benchmark(args); const auto numGoCommands = count_if(setup.commands.begin(), setup.commands.end(), [](const std::string& s) { return s.find("go ") == 0; }); TimePoint totalTime = 0; // Set options once at the start. auto ss = std::istringstream("name Threads value " + std::to_string(setup.threads)); setoption(ss); ss = std::istringstream("name Hash value " + std::to_string(setup.ttSize)); setoption(ss); ss = std::istringstream("name UCI_Chess960 value false"); setoption(ss); // Warmup for (const auto& cmd : setup.commands) { std::istringstream is(cmd); is >> std::skipws >> token; if (token == "go") { // One new line is produced by the search, so omit it here std::cerr << "\rWarmup position " << cnt++ << '/' << NUM_WARMUP_POSITIONS; Search::LimitsType limits = parse_limits(is); // Run with silenced network verification engine.go(limits); engine.wait_for_search_finished(); } else if (token == "position") position(is); else if (token == "ucinewgame") { engine.search_clear(); // search_clear may take a while } if (cnt > NUM_WARMUP_POSITIONS) break; } std::cerr << "\n"; cnt = 1; nodes = 0; int numHashfullReadings = 0; constexpr int hashfullAges[] = {0, 999}; // Only normal hashfull and touched hash. constexpr int hashfullAgeCount = std::size(hashfullAges); int totalHashfull[hashfullAgeCount] = {0}; int maxHashfull[hashfullAgeCount] = {0}; auto updateHashfullReadings = [&]() { numHashfullReadings += 1; for (int i = 0; i < hashfullAgeCount; ++i) { const int hashfull = engine.get_hashfull(hashfullAges[i]); maxHashfull[i] = std::max(maxHashfull[i], hashfull); totalHashfull[i] += hashfull; } }; engine.search_clear(); // search_clear may take a while for (const auto& cmd : setup.commands) { std::istringstream is(cmd); is >> std::skipws >> token; if (token == "go") { // One new line is produced by the search, so omit it here std::cerr << "\rPosition " << cnt++ << '/' << numGoCommands; Search::LimitsType limits = parse_limits(is); nodesSearched = 0; TimePoint elapsed = now(); // Run with silenced network verification engine.go(limits); engine.wait_for_search_finished(); totalTime += now() - elapsed; updateHashfullReadings(); nodes += nodesSearched; } else if (token == "position") position(is); else if (token == "ucinewgame") { engine.search_clear(); // search_clear may take a while } } totalTime = std::max(totalTime, 1); // Ensure positivity to avoid a 'divide by zero' dbg_print(); std::cerr << "\n"; static_assert( std::size(hashfullAges) == 2 && hashfullAges[0] == 0 && hashfullAges[1] == 999, "Hardcoded for display. Would complicate the code needlessly in the current state."); std::string threadBinding = engine.thread_binding_information_as_string(); if (threadBinding.empty()) threadBinding = "none"; // clang-format off std::cerr << "===========================" << "\nVersion : " << engine_version_info() // "\nCompiled by : " << compiler_info() << "Large pages : " << (has_large_pages() ? "yes" : "no") << "\nUser invocation : " << BenchmarkCommand << " " << setup.originalInvocation << "\nFilled invocation : " << BenchmarkCommand << " " << setup.filledInvocation << "\nAvailable processors : " << engine.get_numa_config_as_string() << "\nThread count : " << setup.threads << "\nThread binding : " << threadBinding << "\nTT size [MiB] : " << setup.ttSize << "\nHash max, avg [per mille] : " << "\n single search : " << maxHashfull[0] << ", " << totalHashfull[0] / numHashfullReadings << "\n single game : " << maxHashfull[1] << ", " << totalHashfull[1] / numHashfullReadings << "\nTotal nodes searched : " << nodes << "\nTotal search time [s] : " << totalTime / 1000.0 << "\nNodes/second : " << 1000 * nodes / totalTime << std::endl; // clang-format on init_search_update_listeners(); } void UCIEngine::setoption(std::istringstream& is) { engine.wait_for_search_finished(); engine.get_options().setoption(is); } std::uint64_t UCIEngine::perft(const Search::LimitsType& limits) { auto nodes = engine.perft(engine.fen(), limits.perft, engine.get_options()["UCI_Chess960"]); sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; return nodes; } void UCIEngine::position(std::istringstream& is) { const std::string fullCommand = is.str(); std::string token, fen; is >> token; if (token == "startpos") { fen = StartFEN; is >> token; // Consume the "moves" token, if any } else if (token == "fen") while (is >> token && token != "moves") fen += token + " "; else return; std::vector moves; while (is >> token) { moves.push_back(token); } auto err = engine.set_position(fen, moves); if (err.has_value()) { terminate_on_critical_error(fullCommand, err->what()); } } namespace { struct WinRateParams { double a; double b; }; WinRateParams win_rate_params(const Position& pos) { int material = pos.count() + 3 * pos.count() + 3 * pos.count() + 5 * pos.count() + 9 * pos.count(); // The fitted model only uses data for material counts in [17, 78], and is anchored at count 58. double m = std::clamp(material, 17, 78) / 58.0; // Return a = p_a(material) and b = p_b(material), see github.com/official-stockfish/WDL_model constexpr double as[] = {-72.32565836, 185.93832038, -144.58862193, 416.44950446}; constexpr double bs[] = {83.86794042, -136.06112997, 69.98820887, 47.62901433}; double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; return {a, b}; } // The win rate model is 1 / (1 + exp((a - eval) / b)), where a = p_a(material) and b = p_b(material). // It fits the LTC fishtest statistics rather accurately. int win_rate_model(Value v, const Position& pos) { auto [a, b] = win_rate_params(pos); // Return the win rate in per mille units, rounded to the nearest integer. return int(0.5 + 1000 / (1 + std::exp((a - double(v)) / b))); } } std::string UCIEngine::format_score(const Score& s) { constexpr int TB_CP = 20000; const auto format = overload{[](Score::Mate mate) -> std::string { auto m = (mate.plies > 0 ? (mate.plies + 1) : mate.plies) / 2; return std::string("mate ") + std::to_string(m); }, [](Score::Tablebase tb) -> std::string { return std::string("cp ") + std::to_string((tb.win ? TB_CP - tb.plies : -TB_CP - tb.plies)); }, [](Score::InternalUnits units) -> std::string { return std::string("cp ") + std::to_string(units.value); }}; return s.visit(format); } // Turns a Value to an integer centipawn number, // without treatment of mate and similar special scores. int UCIEngine::to_cp(Value v, const Position& pos) { // In general, the score can be defined via the WDL as // (log(1/L - 1) - log(1/W - 1)) / (log(1/L - 1) + log(1/W - 1)). // Based on our win_rate_model, this simply yields v / a. auto [a, b] = win_rate_params(pos); return int(std::round(100 * int(v) / a)); } std::string UCIEngine::wdl(Value v, const Position& pos) { std::stringstream ss; int wdl_w = win_rate_model(v, pos); int wdl_l = win_rate_model(-v, pos); int wdl_d = 1000 - wdl_w - wdl_l; ss << wdl_w << " " << wdl_d << " " << wdl_l; return ss.str(); } std::string UCIEngine::square(Square s) { return std::string{char('a' + file_of(s)), char('1' + rank_of(s))}; } std::string UCIEngine::move(Move m, bool chess960) { if (m == Move::none()) return "(none)"; if (m == Move::null()) return "0000"; Square from = m.from_sq(); Square to = m.to_sq(); if (m.type_of() == CASTLING && !chess960) to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); std::string move = square(from) + square(to); if (m.type_of() == PROMOTION) move += " pnbrqk"[m.promotion_type()]; return move; } std::string UCIEngine::to_lower(std::string str) { std::transform(str.begin(), str.end(), str.begin(), [](auto c) { return std::tolower(c); }); return str; } Move UCIEngine::to_move(const Position& pos, std::string str) { str = to_lower(str); for (const auto& m : MoveList(pos)) if (str == move(m, pos.is_chess960())) return m; return Move::none(); } void UCIEngine::on_update_no_moves(const Engine::InfoShort& info) { sync_cout << "info depth " << info.depth << " score " << format_score(info.score) << sync_endl; } void UCIEngine::on_update_full(const Engine::InfoFull& info, bool showWDL) { std::stringstream ss; ss << "info"; ss << " depth " << info.depth // << " seldepth " << info.selDepth // << " multipv " << info.multiPV // << " score " << format_score(info.score); // if (!info.bound.empty()) ss << " " << info.bound; if (showWDL) ss << " wdl " << info.wdl; ss << " nodes " << info.nodes // << " nps " << info.nps // << " hashfull " << info.hashfull // << " tbhits " << info.tbHits // << " time " << info.timeMs // << " pv " << info.pv; // sync_cout << ss.str() << sync_endl; } void UCIEngine::on_iter(const Engine::InfoIter& info) { std::stringstream ss; ss << "info"; ss << " depth " << info.depth // << " currmove " << info.currmove // << " currmovenumber " << info.currmovenumber; // sync_cout << ss.str() << sync_endl; } void UCIEngine::on_bestmove(std::string_view bestmove, std::string_view ponder) { sync_cout << "bestmove " << bestmove; if (!ponder.empty()) std::cout << " ponder " << ponder; std::cout << sync_endl; } void UCIEngine::terminate_on_critical_error(const std::string& fullCommand, const std::string& message) { sync_cout << "info string CRITICAL ERROR: Command `" << fullCommand << "` failed. Reason: " << message << '\n' << sync_endl; std::exit(1); } } // namespace Stockfish ================================================ FILE: src/uci.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef UCI_H_INCLUDED #define UCI_H_INCLUDED #include #include #include #include #include "engine.h" #include "misc.h" #include "search.h" namespace Stockfish { class Position; class Move; class Score; enum Square : uint8_t; using Value = int; class UCIEngine { public: UCIEngine(int argc, char** argv); void loop(); static int to_cp(Value v, const Position& pos); static std::string format_score(const Score& s); static std::string square(Square s); static std::string move(Move m, bool chess960); static std::string wdl(Value v, const Position& pos); static std::string to_lower(std::string str); static Move to_move(const Position& pos, std::string str); static Search::LimitsType parse_limits(std::istream& is); auto& engine_options() { return engine.get_options(); } private: Engine engine; CommandLine cli; static void print_info_string(std::string_view str); void go(std::istringstream& is); void bench(std::istream& args); void benchmark(std::istream& args); void position(std::istringstream& is); void setoption(std::istringstream& is); std::uint64_t perft(const Search::LimitsType&); static void on_update_no_moves(const Engine::InfoShort& info); static void on_update_full(const Engine::InfoFull& info, bool showWDL); static void on_iter(const Engine::InfoIter& info); static void on_bestmove(std::string_view bestmove, std::string_view ponder); void init_search_update_listeners(); [[noreturn]] void terminate_on_critical_error(const std::string& fullCommand, const std::string& message); }; } // namespace Stockfish #endif // #ifndef UCI_H_INCLUDED ================================================ FILE: src/ucioption.cpp ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "ucioption.h" #include #include #include #include #include #include #include #include "misc.h" namespace Stockfish { bool CaseInsensitiveLess::operator()(const std::string& s1, const std::string& s2) const { return std::lexicographical_compare( s1.begin(), s1.end(), s2.begin(), s2.end(), [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); } void OptionsMap::add_info_listener(InfoListener&& message_func) { info = std::move(message_func); } void OptionsMap::setoption(std::istringstream& is) { std::string token, name, value; is >> token; // Consume the "name" token // Read the option name (can contain spaces) while (is >> token && token != "value") name += (name.empty() ? "" : " ") + token; // Read the option value (can contain spaces) while (is >> token) value += (value.empty() ? "" : " ") + token; if (options_map.count(name)) options_map[name] = value; else sync_cout << "No such option: " << name << sync_endl; } const Option& OptionsMap::operator[](const std::string& name) const { auto it = options_map.find(name); assert(it != options_map.end()); return it->second; } // Inits options and assigns idx in the correct printing order void OptionsMap::add(const std::string& name, const Option& option) { if (!options_map.count(name)) { static size_t insert_order = 0; options_map[name] = option; options_map[name].parent = this; options_map[name].idx = insert_order++; } else { std::cerr << "Option \"" << name << "\" was already added!" << std::endl; std::exit(EXIT_FAILURE); } } std::size_t OptionsMap::count(const std::string& name) const { return options_map.count(name); } Option::Option(const OptionsMap* map) : parent(map) {} Option::Option(const char* v, OnChange f) : type("string"), min(0), max(0), on_change(std::move(f)) { defaultValue = currentValue = v; } Option::Option(bool v, OnChange f) : type("check"), min(0), max(0), on_change(std::move(f)) { defaultValue = currentValue = (v ? "true" : "false"); } Option::Option(OnChange f) : type("button"), min(0), max(0), on_change(std::move(f)) {} Option::Option(int v, int minv, int maxv, OnChange f) : type("spin"), min(minv), max(maxv), on_change(std::move(f)) { defaultValue = currentValue = std::to_string(v); } Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(std::move(f)) { defaultValue = v; currentValue = cur; } Option::operator int() const { assert(type == "check" || type == "spin"); return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); } Option::operator std::string() const { assert(type == "string"); return currentValue; } bool Option::operator==(const char* s) const { assert(type == "combo"); return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); } bool Option::operator!=(const char* s) const { return !(*this == s); } // Updates currentValue and triggers on_change() action. It's up to // the GUI to check for option's limits, but we could receive the new value // from the user by console window, so let's check the bounds anyway. Option& Option::operator=(const std::string& v) { assert(!type.empty()); if ((type != "button" && type != "string" && v.empty()) || (type == "check" && v != "true" && v != "false") || (type == "spin" && (std::stoi(v) < min || std::stoi(v) > max))) return *this; if (type == "combo") { OptionsMap comboMap; // To have case insensitive compare std::string token; std::istringstream ss(defaultValue); while (ss >> token) comboMap.add(token, Option()); if (!comboMap.count(v) || v == "var") return *this; } if (type == "string") currentValue = v == "" ? "" : v; else if (type != "button") currentValue = v; if (on_change) { const auto ret = on_change(*this); if (ret && parent != nullptr && parent->info != nullptr) parent->info(ret); } return *this; } std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { for (size_t idx = 0; idx < om.options_map.size(); ++idx) for (const auto& it : om.options_map) if (it.second.idx == idx) { const Option& o = it.second; os << "\noption name " << it.first << " type " << o.type; if (o.type == "check" || o.type == "combo") os << " default " << o.defaultValue; else if (o.type == "string") { std::string defaultValue = o.defaultValue.empty() ? "" : o.defaultValue; os << " default " << defaultValue; } else if (o.type == "spin") os << " default " << stoi(o.defaultValue) << " min " << o.min << " max " << o.max; break; } return os; } } ================================================ FILE: src/ucioption.h ================================================ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2026 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Stockfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef UCIOPTION_H_INCLUDED #define UCIOPTION_H_INCLUDED #include #include #include #include #include #include namespace Stockfish { // Define a custom comparator, because the UCI options should be case-insensitive struct CaseInsensitiveLess { bool operator()(const std::string&, const std::string&) const; }; class OptionsMap; // The Option class implements each option as specified by the UCI protocol class Option { public: using OnChange = std::function(const Option&)>; Option(const OptionsMap*); Option(OnChange = nullptr); Option(bool v, OnChange = nullptr); Option(const char* v, OnChange = nullptr); Option(int v, int minv, int maxv, OnChange = nullptr); Option(const char* v, const char* cur, OnChange = nullptr); Option& operator=(const std::string&); operator int() const; operator std::string() const; bool operator==(const char*) const; bool operator!=(const char*) const; friend std::ostream& operator<<(std::ostream&, const OptionsMap&); int operator<<(const Option&) = delete; private: friend class OptionsMap; friend class Engine; friend class Tune; std::string defaultValue, currentValue, type; int min, max; size_t idx; OnChange on_change; const OptionsMap* parent = nullptr; }; class OptionsMap { public: using InfoListener = std::function)>; OptionsMap() = default; OptionsMap(const OptionsMap&) = delete; OptionsMap(OptionsMap&&) = delete; OptionsMap& operator=(const OptionsMap&) = delete; OptionsMap& operator=(OptionsMap&&) = delete; void add_info_listener(InfoListener&&); void setoption(std::istringstream&); const Option& operator[](const std::string&) const; void add(const std::string&, const Option& option); std::size_t count(const std::string&) const; private: friend class Engine; friend class Option; friend std::ostream& operator<<(std::ostream&, const OptionsMap&); // The options container is defined as a std::map using OptionsStore = std::map; OptionsStore options_map; InfoListener info; }; } #endif // #ifndef UCIOPTION_H_INCLUDED ================================================ FILE: tests/.gitattributes ================================================ *.sh text eol=lf ================================================ FILE: tests/instrumented.py ================================================ import argparse import re import sys import subprocess import pathlib import os import fnmatch from testing import ( EPD, TSAN, Stockfish as Engine, MiniTestFramework, OrderedClassMembers, Valgrind, Syzygy, ) PATH = pathlib.Path(__file__).parent.resolve() CWD = os.getcwd() def get_prefix(): if args.valgrind: return Valgrind.get_valgrind_command() if args.valgrind_thread: return Valgrind.get_valgrind_thread_command() return [] def get_threads(): if args.valgrind_thread or args.sanitizer_thread: return 2 return 1 def get_path(): return os.path.abspath(os.path.join(CWD, args.stockfish_path)) def postfix_check(output): if args.sanitizer_undefined: for idx, line in enumerate(output): if "runtime error:" in line: # print next possible 50 lines for i in range(50): debug_idx = idx + i if debug_idx < len(output): print(output[debug_idx]) return False if args.sanitizer_thread: for idx, line in enumerate(output): if "WARNING: ThreadSanitizer:" in line: # print next possible 50 lines for i in range(50): debug_idx = idx + i if debug_idx < len(output): print(output[debug_idx]) return False return True def Stockfish(*args, **kwargs): return Engine(get_prefix(), get_path(), *args, **kwargs) class TestCLI(metaclass=OrderedClassMembers): def beforeAll(self): pass def afterAll(self): pass def beforeEach(self): self.stockfish = None def afterEach(self): assert postfix_check(self.stockfish.get_output()) == True self.stockfish.clear_output() def test_eval(self): self.stockfish = Stockfish("eval".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_nodes_1000(self): self.stockfish = Stockfish("go nodes 1000".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_depth_10(self): self.stockfish = Stockfish("go depth 10".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_perft_4(self): self.stockfish = Stockfish("go perft 4".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_movetime_1000(self): self.stockfish = Stockfish("go movetime 1000".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_wtime_8000_btime_8000_winc_500_binc_500(self): self.stockfish = Stockfish( "go wtime 8000 btime 8000 winc 500 binc 500".split(" "), True, ) assert self.stockfish.process.returncode == 0 def test_go_wtime_1000_btime_1000_winc_0_binc_0(self): self.stockfish = Stockfish( "go wtime 1000 btime 1000 winc 0 binc 0".split(" "), True, ) assert self.stockfish.process.returncode == 0 def test_go_wtime_1000_btime_1000_winc_0_binc_0_movestogo_5(self): self.stockfish = Stockfish( "go wtime 1000 btime 1000 winc 0 binc 0 movestogo 5".split(" "), True, ) assert self.stockfish.process.returncode == 0 def test_go_movetime_200(self): self.stockfish = Stockfish("go movetime 200".split(" "), True) assert self.stockfish.process.returncode == 0 def test_go_nodes_20000_searchmoves_e2e4_d2d4(self): self.stockfish = Stockfish( "go nodes 20000 searchmoves e2e4 d2d4".split(" "), True ) assert self.stockfish.process.returncode == 0 def test_bench_128_threads_8_default_depth(self): self.stockfish = Stockfish( f"bench 128 {get_threads()} 8 default depth".split(" "), True, ) assert self.stockfish.process.returncode == 0 def test_bench_128_threads_3_bench_tmp_epd_depth(self): self.stockfish = Stockfish( f"bench 128 {get_threads()} 3 {os.path.join(PATH, 'bench_tmp.epd')} depth".split( " " ), True, ) assert self.stockfish.process.returncode == 0 def test_d(self): self.stockfish = Stockfish("d".split(" "), True) assert self.stockfish.process.returncode == 0 def test_compiler(self): self.stockfish = Stockfish("compiler".split(" "), True) assert self.stockfish.process.returncode == 0 def test_license(self): self.stockfish = Stockfish("license".split(" "), True) assert self.stockfish.process.returncode == 0 def test_uci(self): self.stockfish = Stockfish("uci".split(" "), True) assert self.stockfish.process.returncode == 0 def test_export_net_verify_nnue(self): current_path = os.path.abspath(os.getcwd()) self.stockfish = Stockfish( f"export_net {os.path.join(current_path, 'verify.nnue')}".split(" "), True ) assert self.stockfish.process.returncode == 0 # verify the generated net equals the base net def test_network_equals_base(self): self.stockfish = Stockfish( ["uci"], True, ) output = self.stockfish.process.stdout # find line for line in output.split("\n"): if "option name EvalFile type string default" in line: network = line.split(" ")[-1] break # find network file in src dir network = os.path.join(PATH.parent.resolve(), "src", network) if not os.path.exists(network): print( f"Network file {network} not found, please download the network file over the make command." ) assert False diff = subprocess.run(["diff", network, f"verify.nnue"]) assert diff.returncode == 0 class TestInteractive(metaclass=OrderedClassMembers): def beforeAll(self): self.stockfish = Stockfish() def afterAll(self): self.stockfish.quit() assert self.stockfish.close() == 0 def afterEach(self): assert postfix_check(self.stockfish.get_output()) == True self.stockfish.clear_output() def test_startup_output(self): self.stockfish.starts_with("Stockfish") def test_uci_command(self): self.stockfish.send_command("uci") self.stockfish.equals("uciok") def test_set_threads_option(self): self.stockfish.send_command(f"setoption name Threads value {get_threads()}") def test_ucinewgame_and_startpos_nodes_1000(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position startpos") self.stockfish.send_command("go nodes 1000") self.stockfish.starts_with("bestmove") def test_ucinewgame_and_startpos_moves(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position startpos moves e2e4 e7e6") self.stockfish.send_command("go nodes 1000") self.stockfish.starts_with("bestmove") def test_fen_position_1(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1") self.stockfish.send_command("go nodes 1000") self.stockfish.starts_with("bestmove") def test_fen_position_2_flip(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1") self.stockfish.send_command("flip") self.stockfish.send_command("go nodes 1000") self.stockfish.starts_with("bestmove") def test_depth_5_with_callback(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position startpos") self.stockfish.send_command("go depth 5") def callback(output): regex = r"info depth \d+ seldepth \d+ multipv \d+ score cp -?\d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv" if output.startswith("info depth") and not re.match(regex, output): assert False if output.startswith("bestmove"): return True return False self.stockfish.check_output(callback) def test_ucinewgame_and_go_depth_9(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("setoption name UCI_ShowWDL value true") self.stockfish.send_command("position startpos") self.stockfish.send_command("go depth 9") depth = 1 def callback(output): nonlocal depth regex = rf"info depth {depth} seldepth \d+ multipv \d+ score cp -?\d+ wdl \d+ \d+ \d+ nodes \d+ nps \d+ hashfull \d+ tbhits \d+ time \d+ pv" if output.startswith("info depth"): if not re.match(regex, output): assert False depth += 1 if output.startswith("bestmove"): assert depth == 10 return True return False self.stockfish.check_output(callback) def test_clear_hash(self): self.stockfish.send_command("setoption name Clear Hash") def test_fen_position_mate_1(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 5K2/8/2qk4/2nPp3/3r4/6B1/B7/3R4 w - e6" ) self.stockfish.send_command("go depth 18") self.stockfish.expect("* score mate 1 * pv d5e6") self.stockfish.equals("bestmove d5e6") def test_fen_position_mate_minus_1(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 2brrb2/8/p7/Q7/1p1kpPp1/1P1pN1K1/3P4/8 b - -" ) self.stockfish.send_command("go depth 18") self.stockfish.expect("* score mate -1 *") self.stockfish.starts_with("bestmove") def test_fen_position_fixed_node(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 5K2/8/2P1P1Pk/6pP/3p2P1/1P6/3P4/8 w - - 0 1" ) self.stockfish.send_command("go nodes 500000") self.stockfish.starts_with("bestmove") def test_fen_position_with_mate_go_depth(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" ) self.stockfish.send_command("go depth 18 searchmoves c6d7") self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") self.stockfish.starts_with("bestmove") def test_fen_position_with_mate_go_mate(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" ) self.stockfish.send_command("go mate 2 searchmoves c6d7") self.stockfish.expect("* score mate 2 * pv c6d7 *") self.stockfish.starts_with("bestmove") def test_fen_position_with_mate_go_nodes(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" ) self.stockfish.send_command("go nodes 500000 searchmoves c6d7") self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") self.stockfish.starts_with("bestmove") def test_fen_position_depth_27(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen r1b2r1k/pp1p2pp/2p5/2B1q3/8/8/P1PN2PP/R4RK1 w - - 0 18" ) self.stockfish.send_command("go") self.stockfish.contains("score mate 1") self.stockfish.starts_with("bestmove") def test_fen_position_with_mate_go_depth_and_promotion(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7 f2f1q" ) self.stockfish.send_command("go depth 18") self.stockfish.expect("* score mate 1 * pv f7f5") self.stockfish.starts_with("bestmove f7f5") def test_fen_position_with_mate_go_depth_and_searchmoves(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - -" ) self.stockfish.send_command("go depth 18 searchmoves c6d7") self.stockfish.expect("* score mate 2 * pv c6d7 * f7f5") self.stockfish.starts_with("bestmove c6d7") def test_fen_position_with_moves_with_mate_go_depth_and_searchmoves(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command( "position fen 8/5R2/2K1P3/4k3/8/b1PPpp1B/5p2/8 w - - moves c6d7" ) self.stockfish.send_command("go depth 18 searchmoves e3e2") self.stockfish.expect("* score mate -1 * pv e3e2 f7f5") self.stockfish.starts_with("bestmove e3e2") def test_verify_nnue_network(self): current_path = os.path.abspath(os.getcwd()) Stockfish( f"export_net {os.path.join(current_path, 'verify.nnue')}".split(" "), True ) self.stockfish.send_command("setoption name EvalFile value verify.nnue") self.stockfish.send_command("position startpos") self.stockfish.send_command("go depth 5") self.stockfish.starts_with("bestmove") def test_multipv_setting(self): self.stockfish.send_command("setoption name MultiPV value 4") self.stockfish.send_command("position startpos") self.stockfish.send_command("go depth 5") self.stockfish.starts_with("bestmove") def test_fen_position_with_skill_level(self): self.stockfish.send_command("setoption name Skill Level value 10") self.stockfish.send_command("position startpos") self.stockfish.send_command("go depth 5") self.stockfish.starts_with("bestmove") self.stockfish.send_command("setoption name Skill Level value 20") class TestSyzygy(metaclass=OrderedClassMembers): def beforeAll(self): self.stockfish = Stockfish() def afterAll(self): self.stockfish.quit() assert self.stockfish.close() == 0 def afterEach(self): assert postfix_check(self.stockfish.get_output()) == True self.stockfish.clear_output() def test_syzygy_setup(self): self.stockfish.starts_with("Stockfish") self.stockfish.send_command("uci") self.stockfish.send_command( f"setoption name SyzygyPath value {os.path.join(PATH, 'syzygy')}" ) self.stockfish.expect( "info string Found 35 WDL and 35 DTZ tablebase files (up to 4-man)." ) def test_syzygy_bench(self): self.stockfish.send_command("bench 128 1 8 default depth") self.stockfish.expect("Nodes searched :*") def test_syzygy_position(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position fen 4k3/PP6/8/8/8/8/8/4K3 w - - 0 1") self.stockfish.send_command("go depth 5") def check_output(output): if "score cp 20000" in output or "score mate" in output: return True self.stockfish.check_output(check_output) self.stockfish.expect("bestmove *") def test_syzygy_position_2(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 w - - 0 1") self.stockfish.send_command("go depth 5") def check_output(output): if "score cp 20000" in output or "score mate" in output: return True self.stockfish.check_output(check_output) self.stockfish.expect("bestmove *") def test_syzygy_position_3(self): self.stockfish.send_command("ucinewgame") self.stockfish.send_command("position fen 8/1P6/2B5/8/4K3/8/6k1/8 b - - 0 1") self.stockfish.send_command("go depth 5") def check_output(output): if "score cp -20000" in output or "score mate -" in output: return True self.stockfish.check_output(check_output) self.stockfish.expect("bestmove *") class TestEnPassantSanitization(metaclass=OrderedClassMembers): def beforeAll(self): self.stockfish = Stockfish() def afterAll(self): self.stockfish.quit() assert self.stockfish.close() == 0 def afterEach(self): assert postfix_check(self.stockfish.get_output()) == True self.stockfish.clear_output() def test_position_1(self): self.stockfish.send_command("position fen rnbqkbnr/ppp1p1pp/5p2/3pP3/8/8/PPPP1PPP/RNBQKBNR w kq d6 0 3") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*rnbqkbnr/ppp1p1pp/5p2/3pP3/8/8/PPPP1PPP/RNBQKBNR w kq d6 0 3*") def test_position_2(self): self.stockfish.send_command("position fen k7/8/8/1pP5/2K5/8/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k7/8/8/1pP5/2K5/8/8/8 w - b6 0 1*") def test_position_3(self): self.stockfish.send_command("position fen k1r5/8/8/1pP5/2K5/8/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k1r5/8/8/1pP5/2K5/8/8/8 w - - 0 1*") def test_position_4(self): self.stockfish.send_command("position fen k1r5/8/8/1pP5/8/2K5/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k1r5/8/8/1pP5/8/2K5/8/8 w - - 0 1*") def test_position_5(self): self.stockfish.send_command("position fen k1r5/8/8/PpP5/8/2K5/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k1r5/8/8/PpP5/8/2K5/8/8 w - b6 0 1*") def test_position_6(self): self.stockfish.send_command("position fen k1r5/8/8/PpP5/2K5/8/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k1r5/8/8/PpP5/2K5/8/8/8 w - b6 0 1*") def test_position_7(self): self.stockfish.send_command("position fen k7/4b3/8/PpP5/1K6/8/8/8 w - b6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k7/4b3/8/PpP5/1K6/8/8/8 w - b6 0 1*") def test_position_8(self): self.stockfish.send_command("position fen k7/b5b1/8/2PpP3/3K4/8/8/8 w - d6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k7/b5b1/8/2PpP3/3K4/8/8/8 w - - 0 1*") def test_position_9(self): self.stockfish.send_command("position fen k7/8/8/r2pPK2/8/8/8/8 w - d6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k7/8/8/r2pPK2/8/8/8/8 w - - 0 1*") def test_position_10(self): self.stockfish.send_command("position fen k7/8/8/r1PpPK2/8/8/8/8 w - d6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*k7/8/8/r1PpPK2/8/8/8/8 w - d6 0 1*") def test_position_11(self): self.stockfish.send_command("position fen kb6/8/8/3pP3/5K2/8/8/8 w - d6 0 1") self.stockfish.send_command("d") self.stockfish.expect_for_line_matching("Fen*", "*kb6/8/8/3pP3/5K2/8/8/8 w - d6 0 1*") def test_position_find_draw(self): self.stockfish.send_command("position fen q4kb1/3Q2nq/8/r3PpK1/2n5/7q/8/q7 w - f6 0 1 moves d7c8 f8f7 c8d7 f7f8 d7d8 f8f7") self.stockfish.send_command("go nodes 10000") def check_output(output): if fnmatch.fnmatch(output, "* score cp 0 * pv d8d7*"): return True self.stockfish.check_output(check_output) self.stockfish.expect("bestmove d8d7*") def parse_args(): parser = argparse.ArgumentParser(description="Run Stockfish with testing options") parser.add_argument("--valgrind", action="store_true", help="Run valgrind testing") parser.add_argument( "--valgrind-thread", action="store_true", help="Run valgrind-thread testing" ) parser.add_argument( "--sanitizer-undefined", action="store_true", help="Run sanitizer-undefined testing", ) parser.add_argument( "--sanitizer-thread", action="store_true", help="Run sanitizer-thread testing" ) parser.add_argument( "--none", action="store_true", help="Run without any testing options" ) parser.add_argument("stockfish_path", type=str, help="Path to Stockfish binary") return parser.parse_args() if __name__ == "__main__": args = parse_args() EPD.create_bench_epd() TSAN.set_tsan_option() Syzygy.download_syzygy() framework = MiniTestFramework() # Each test suite will be run inside a temporary directory framework.run([TestCLI, TestInteractive, TestSyzygy, TestEnPassantSanitization]) EPD.delete_bench_epd() TSAN.unset_tsan_option() if framework.has_failed(): sys.exit(1) sys.exit(0) ================================================ FILE: tests/perft.sh ================================================ #!/bin/bash # verify perft numbers (positions from https://www.chessprogramming.org/Perft_Results) TESTS_FAILED=0 error() { echo "perft testing failed on line $1" exit 1 } trap 'error ${LINENO}' ERR echo "perft testing started" EXPECT_SCRIPT=$(mktemp) cat << 'EOF' > $EXPECT_SCRIPT #!/usr/bin/expect -f set timeout 120 lassign [lrange $argv 0 4] pos depth result chess960 logfile log_file -noappend $logfile spawn ./stockfish if {$chess960 == "true"} { send "setoption name UCI_Chess960 value true\n" } send "position $pos\ngo perft $depth\n" expect { "Nodes searched: $result" {} timeout {puts "TIMEOUT: Expected $result nodes"; exit 1} eof {puts "EOF: Stockfish crashed"; exit 2} } send "quit\n" expect eof EOF chmod +x $EXPECT_SCRIPT run_test() { local pos="$1" local depth="$2" local expected="$3" local chess960="$4" local tmp_file=$(mktemp) echo -n "Testing depth $depth: ${pos:0:40}... " if $EXPECT_SCRIPT "$pos" "$depth" "$expected" "$chess960" "$tmp_file" > /dev/null 2>&1; then echo "OK" rm -f "$tmp_file" else local exit_code=$? echo "FAILED (exit code: $exit_code)" echo "===== Output for failed test =====" cat "$tmp_file" echo "==================================" rm -f "$tmp_file" TESTS_FAILED=1 fi } # standard positions run_test "startpos" 7 3195901860 "false" run_test "fen r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq -" 5 193690690 "false" run_test "fen 8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - -" 7 178633661 "false" run_test "fen r3k2r/Pppp1ppp/1b3nbN/nP6/BBP1P3/q4N2/Pp1P2PP/R2Q1RK1 w kq - 0 1" 6 706045033 "false" run_test "fen rnbq1k1r/pp1Pbppp/2p5/8/2B5/8/PPP1NnPP/RNBQK2R w KQ - 1 8" 5 89941194 "false" run_test "fen r4rk1/1pp1qppp/p1np1n2/2b1p1B1/2B1P1b1/P1NP1N2/1PP1QPPP/R4RK1 w - - 0 10" 5 164075551 "false" run_test "fen r7/4p3/5p1q/3P4/4pQ2/4pP2/6pp/R3K1kr w Q - 1 3" 5 11609488 "false" # chess960 positions run_test "fen rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w AHah - 0 1" 6 119060324 "true" run_test "fen 1rqbkrbn/1ppppp1p/1n6/p1N3p1/8/2P4P/PP1PPPP1/1RQBKRBN w FBfb - 0 9" 6 191762235 "true" run_test "fen rbbqn1kr/pp2p1pp/6n1/2pp1p2/2P4P/P7/BP1PPPP1/R1BQNNKR w HAha - 0 9" 6 924181432 "true" run_test "fen rqbbknr1/1ppp2pp/p5n1/4pp2/P7/1PP5/1Q1PPPPP/R1BBKNRN w GAga - 0 9" 6 308553169 "true" run_test "fen 4rrb1/1kp3b1/1p1p4/pP1Pn2p/5p2/1PR2P2/2P1NB1P/2KR1B2 w D - 0 21" 6 872323796 "true" run_test "fen 1rkr3b/1ppn3p/3pB1n1/6q1/R2P4/4N1P1/1P5P/2KRQ1B1 b Dbd - 0 14" 6 2678022813 "true" run_test "fen qbbnrkr1/p1pppppp/1p4n1/8/2P5/6N1/PPNPPPPP/1BRKBRQ1 b FCge - 1 3" 6 521301336 "true" run_test "fen rr6/2kpp3/1ppn2p1/p2b1q1p/P4P1P/1PNN2P1/2PP4/1K2R2R b E - 1 20" 2 1438 "true" run_test "fen rr6/2kpp3/1ppn2p1/p2b1q1p/P4P1P/1PNN2P1/2PP4/1K2RR2 w E - 0 20" 3 37340 "true" run_test "fen rr6/2kpp3/1ppnb1p1/p2Q1q1p/P4P1P/1PNN2P1/2PP4/1K2RR2 b E - 2 19" 4 2237725 "true" run_test "fen rr6/2kpp3/1ppnb1p1/p4q1p/P4P1P/1PNN2P1/2PP2Q1/1K2RR2 w E - 1 19" 4 2098209 "true" run_test "fen rr6/2kpp3/1ppnb1p1/p4q1p/P4P1P/1PNN2P1/2PP2Q1/1K2RR2 w E - 1 19" 5 79014522 "true" run_test "fen rr6/2kpp3/1ppnb1p1/p4q1p/P4P1P/1PNN2P1/2PP2Q1/1K2RR2 w E - 1 19" 6 2998685421 "true" rm -f $EXPECT_SCRIPT echo "perft testing completed" if [ $TESTS_FAILED -ne 0 ]; then echo "Some tests failed" exit 1 fi ================================================ FILE: tests/reprosearch.sh ================================================ #!/bin/bash # verify reproducible search error() { echo "reprosearch testing failed on line $1" exit 1 } trap 'error ${LINENO}' ERR echo "reprosearch testing started" # repeat two short games, separated by ucinewgame. # with go nodes $nodes they should result in exactly # the same node count for each iteration. cat << EOF > repeat.exp set timeout 10 spawn ./stockfish lassign \$argv nodes send "uci\n" expect "uciok" send "ucinewgame\n" send "position startpos\n" send "go nodes \$nodes\n" expect "bestmove" send "position startpos moves e2e4 e7e6\n" send "go nodes \$nodes\n" expect "bestmove" send "ucinewgame\n" send "position startpos\n" send "go nodes \$nodes\n" expect "bestmove" send "position startpos moves e2e4 e7e6\n" send "go nodes \$nodes\n" expect "bestmove" send "quit\n" expect eof EOF # to increase the likelihood of finding a non-reproducible case, # the allowed number of nodes are varied systematically for i in `seq 1 20` do nodes=$((100*3**i/2**i)) echo "reprosearch testing with $nodes nodes" # each line should appear exactly an even number of times expect repeat.exp $nodes 2>&1 | grep -o "nodes [0-9]*" | sort | uniq -c | awk '{if ($1%2!=0) exit(1)}' done rm repeat.exp echo "reprosearch testing OK" ================================================ FILE: tests/signature.sh ================================================ #!/bin/bash # obtain and optionally verify Bench / signature # if no reference is given, the output is deliberately limited to just the signature STDOUT_FILE=$(mktemp) STDERR_FILE=$(mktemp) error() { echo "running bench for signature failed on line $1" echo "===== STDOUT =====" cat "$STDOUT_FILE" echo "===== STDERR =====" cat "$STDERR_FILE" rm -f "$STDOUT_FILE" "$STDERR_FILE" exit 1 } trap 'error ${LINENO}' ERR # obtain eval "$RUN_PREFIX ./stockfish bench" > "$STDOUT_FILE" 2> "$STDERR_FILE" || error ${LINENO} signature=$(grep "Nodes searched : " "$STDERR_FILE" | awk '{print $4}') rm -f "$STDOUT_FILE" "$STDERR_FILE" if [ $# -gt 0 ]; then # compare to given reference if [ "$1" != "$signature" ]; then if [ -z "$signature" ]; then echo "No signature obtained from bench. Code crashed or assert triggered ?" else echo "signature mismatch: reference $1 obtained: $signature ." fi exit 1 else echo "signature OK: $signature" fi else # just report signature echo $signature fi ================================================ FILE: tests/testing.py ================================================ import subprocess from typing import List import os import collections import time import sys import traceback import fnmatch from functools import wraps from contextlib import redirect_stdout import io import tarfile import pathlib import concurrent.futures import tempfile import shutil import requests CYAN_COLOR = "\033[36m" GRAY_COLOR = "\033[2m" RED_COLOR = "\033[31m" GREEN_COLOR = "\033[32m" RESET_COLOR = "\033[0m" WHITE_BOLD = "\033[1m" MAX_TIMEOUT = 60 * 5 PATH = pathlib.Path(__file__).parent.resolve() class Valgrind: @staticmethod def get_valgrind_command(): return [ "valgrind", "--error-exitcode=42", "--errors-for-leak-kinds=all", "--leak-check=full", ] @staticmethod def get_valgrind_thread_command(): return ["valgrind", "--error-exitcode=42", "--fair-sched=try"] class TSAN: @staticmethod def set_tsan_option(): with open(f"tsan.supp", "w") as f: f.write( """ race:Stockfish::TTEntry::read race:Stockfish::TTEntry::save race:Stockfish::TranspositionTable::probe race:Stockfish::TranspositionTable::hashfull """ ) os.environ["TSAN_OPTIONS"] = "suppressions=./tsan.supp" @staticmethod def unset_tsan_option(): os.environ.pop("TSAN_OPTIONS", None) os.remove(f"tsan.supp") class EPD: @staticmethod def create_bench_epd(): with open(f"{os.path.join(PATH,'bench_tmp.epd')}", "w") as f: f.write( """ Rn6/1rbq1bk1/2p2n1p/2Bp1p2/3Pp1pP/1N2P1P1/2Q1NPB1/6K1 w - - 2 26 rnbqkb1r/ppp1pp2/5n1p/3p2p1/P2PP3/5P2/1PP3PP/RNBQKBNR w KQkq - 0 3 3qnrk1/4bp1p/1p2p1pP/p2bN3/1P1P1B2/P2BQ3/5PP1/4R1K1 w - - 9 28 r4rk1/1b2ppbp/pq4pn/2pp1PB1/1p2P3/1P1P1NN1/1PP3PP/R2Q1RK1 w - - 0 13 """ ) @staticmethod def delete_bench_epd(): os.remove(f"{os.path.join(PATH,'bench_tmp.epd')}") class Syzygy: @staticmethod def get_syzygy_path(): return os.path.abspath("syzygy") @staticmethod def download_syzygy(): if not os.path.isdir(os.path.join(PATH, "syzygy")): url = "https://api.github.com/repos/niklasf/python-chess/tarball/9b9aa13f9f36d08aadfabff872882f4ab1494e95" file = "niklasf-python-chess-9b9aa13" with tempfile.TemporaryDirectory() as tmpdirname: tarball_path = os.path.join(tmpdirname, f"{file}.tar.gz") response = requests.get(url, stream=True) with open(tarball_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) with tarfile.open(tarball_path, "r:gz") as tar: tar.extractall(tmpdirname) shutil.move( os.path.join(tmpdirname, file), os.path.join(PATH, "syzygy") ) class OrderedClassMembers(type): @classmethod def __prepare__(self, name, bases): return collections.OrderedDict() def __new__(self, name, bases, classdict): classdict["__ordered__"] = [ key for key in classdict.keys() if key not in ("__module__", "__qualname__") ] return type.__new__(self, name, bases, classdict) class TimeoutException(Exception): def __init__(self, message: str, timeout: int): self.message = message self.timeout = timeout class UnexpectedOutputException(Exception): def __init__(self, actual: str, expected: str): self.actual = actual self.expected = expected def timeout_decorator(timeout: float): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): with concurrent.futures.ThreadPoolExecutor() as executor: future = executor.submit(func, *args, **kwargs) try: result = future.result(timeout=timeout) except concurrent.futures.TimeoutError: raise TimeoutException( f"Function {func.__name__} timed out after {timeout} seconds", timeout, ) return result return wrapper return decorator class MiniTestFramework: def __init__(self): self.passed_test_suites = 0 self.failed_test_suites = 0 self.passed_tests = 0 self.failed_tests = 0 self.stop_on_failure = True def has_failed(self) -> bool: return self.failed_test_suites > 0 def run(self, classes: List[type]) -> bool: self.start_time = time.time() for test_class in classes: with tempfile.TemporaryDirectory() as tmpdirname: original_cwd = os.getcwd() os.chdir(tmpdirname) try: if self.__run(test_class): self.failed_test_suites += 1 else: self.passed_test_suites += 1 except Exception as e: self.failed_test_suites += 1 print(f"\n{RED_COLOR}Error: {e}{RESET_COLOR}") finally: os.chdir(original_cwd) self.__print_summary(round(time.time() - self.start_time, 2)) return self.has_failed() def __run(self, test_class) -> bool: test_instance = test_class() test_name = test_instance.__class__.__name__ test_methods = [m for m in test_instance.__ordered__ if m.startswith("test_")] print(f"\nTest Suite: {test_name}") if hasattr(test_instance, "beforeAll"): test_instance.beforeAll() fails = 0 for method in test_methods: fails += self.__run_test_method(test_instance, method) if hasattr(test_instance, "afterAll"): test_instance.afterAll() self.failed_tests += fails return fails > 0 def __run_test_method(self, test_instance, method: str) -> int: print(f" Running {method}... \r", end="", flush=True) buffer = io.StringIO() fails = 0 try: t0 = time.time() with redirect_stdout(buffer): if hasattr(test_instance, "beforeEach"): test_instance.beforeEach() getattr(test_instance, method)() if hasattr(test_instance, "afterEach"): test_instance.afterEach() duration = time.time() - t0 self.print_success(f" {method} ({duration * 1000:.2f}ms)") self.passed_tests += 1 except Exception as e: if isinstance(e, TimeoutException): self.print_failure( f" {method} (hit execution limit of {e.timeout} seconds)" ) if isinstance(e, UnexpectedOutputException): self.print_failure( f" {method} encountered unexpected output: \"{e.actual}\" when output matching \"{e.expected}\" was expected" ) if isinstance(e, AssertionError): self.__handle_assertion_error(t0, method) if self.stop_on_failure: self.__print_buffer_output(buffer) raise e fails += 1 finally: self.__print_buffer_output(buffer) return fails def __handle_assertion_error(self, start_time, method: str): duration = time.time() - start_time self.print_failure(f" {method} ({duration * 1000:.2f}ms)") traceback_output = "".join(traceback.format_tb(sys.exc_info()[2])) colored_traceback = "\n".join( f" {CYAN_COLOR}{line}{RESET_COLOR}" for line in traceback_output.splitlines() ) print(colored_traceback) def __print_buffer_output(self, buffer: io.StringIO): output = buffer.getvalue() if output: indented_output = "\n".join(f" {line}" for line in output.splitlines()) print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}") print(f"{GRAY_COLOR}{indented_output}{RESET_COLOR}") print(f" {RED_COLOR}⎯⎯⎯⎯⎯OUTPUT⎯⎯⎯⎯⎯{RESET_COLOR}") def __print_summary(self, duration: float): print(f"\n{WHITE_BOLD}Test Summary{RESET_COLOR}\n") print( f" Test Suites: {GREEN_COLOR}{self.passed_test_suites} passed{RESET_COLOR}, {RED_COLOR}{self.failed_test_suites} failed{RESET_COLOR}, {self.passed_test_suites + self.failed_test_suites} total" ) print( f" Tests: {GREEN_COLOR}{self.passed_tests} passed{RESET_COLOR}, {RED_COLOR}{self.failed_tests} failed{RESET_COLOR}, {self.passed_tests + self.failed_tests} total" ) print(f" Time: {duration}s\n") def print_failure(self, add: str): print(f" {RED_COLOR}✗{RESET_COLOR}{add}", flush=True) def print_success(self, add: str): print(f" {GREEN_COLOR}✓{RESET_COLOR}{add}", flush=True) class Stockfish: def __init__( self, prefix: List[str], path: str, args: List[str] = [], cli: bool = False, ): self.path = path self.process = None self.args = args self.cli = cli self.prefix = prefix self.output = [] self.start() def _check_process_alive(self): if not self.process or self.process.poll() is not None: print("\n".join(self.output)) raise RuntimeError("Stockfish process has terminated") def start(self): if self.cli: self.process = subprocess.run( self.prefix + [self.path] + self.args, capture_output=True, text=True, ) if self.process.returncode != 0: print(self.process.stdout) print(self.process.stderr) print(f"Process failed with return code {self.process.returncode}") return self.process = subprocess.Popen( self.prefix + [self.path] + self.args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1, ) def setoption(self, name: str, value: str): self.send_command(f"setoption name {name} value {value}") def send_command(self, command: str): if not self.process: raise RuntimeError("Stockfish process is not started") self._check_process_alive() self.process.stdin.write(command + "\n") self.process.stdin.flush() @timeout_decorator(MAX_TIMEOUT) def equals(self, expected_output: str): for line in self.readline(): if line == expected_output: return @timeout_decorator(MAX_TIMEOUT) def expect(self, expected_output: str): for line in self.readline(): if fnmatch.fnmatch(line, expected_output): return @timeout_decorator(MAX_TIMEOUT) def contains(self, expected_output: str): for line in self.readline(): if expected_output in line: return @timeout_decorator(MAX_TIMEOUT) def starts_with(self, expected_output: str): for line in self.readline(): if line.startswith(expected_output): return @timeout_decorator(MAX_TIMEOUT) def check_output(self, callback): if not callback: raise ValueError("Callback function is required") for line in self.readline(): if callback(line) == True: return @timeout_decorator(MAX_TIMEOUT) def expect_for_line_matching(self, line_match: str, expected: str): for line in self.readline(): if fnmatch.fnmatch(line, line_match): if fnmatch.fnmatch(line, expected): break else: raise UnexpectedOutputException(line, expected) def readline(self): if not self.process: raise RuntimeError("Stockfish process is not started") while True: self._check_process_alive() line = self.process.stdout.readline().strip() self.output.append(line) yield line def clear_output(self): self.output = [] def get_output(self) -> List[str]: return self.output def quit(self): self.send_command("quit") def close(self): if self.process: self.process.stdin.close() self.process.stdout.close() return self.process.wait() return 0