Repository: hajimes/mmh3
Branch: master
Commit: fd45e5ef5078
Files: 49
Total size: 283.8 KB

Directory structure:
gitextract_gkc_80ex/

├── .clang-format
├── .github/
│   ├── actionlint.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── benchmark-base-hash.yml
│       ├── benchmark.yml
│       ├── build.yml
│       ├── draft-pdf.yml
│       ├── superlinter.yml
│       └── wheels.yml
├── .gitignore
├── .gitmodules
├── .markdown-lint.yml
├── .readthedocs.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── benchmark/
│   ├── benchmark.py
│   ├── generate_table.py
│   ├── plot_graph.py
│   └── plot_graph_base_hash.py
├── docs/
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── CONTRIBUTORS.md
│   ├── Makefile
│   ├── api.md
│   ├── benchmark.md
│   ├── changelog.md
│   ├── conf.py
│   ├── index.rst
│   ├── make.bat
│   └── quickstart.md
├── paper/
│   ├── paper.bib
│   └── paper.md
├── pyproject.toml
├── src/
│   └── mmh3/
│       ├── __init__.pyi
│       ├── hashlib.h
│       ├── mmh3module.c
│       ├── murmurhash3.c
│       ├── murmurhash3.h
│       └── py.typed
├── tests/
│   ├── helper.py
│   ├── test_doctrings.py
│   ├── test_free_threading.py
│   ├── test_invalid_inputs.py
│   ├── test_mmh3.py
│   └── test_mmh3_hasher.py
├── tox.ini
└── util/
    ├── FILE_HEADER
    └── refresh.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
# This .clang-format was originally written by Paul Ganssle
# Its exact license is unknown but compatible with MIT
# https://gist.github.com/pganssle/0e3a5f828b4d07d79447f6ced8e7e4db

# A clang-format style that approximates Python's PEP 7
# Useful for IDE integration
BasedOnStyle: Google
AlwaysBreakAfterReturnType: All
AllowShortIfStatementsOnASingleLine: false
AlignAfterOpenBracket: Align
BreakBeforeBraces: Stroustrup
ColumnLimit: 79
DerivePointerAlignment: false
IndentWidth: 4
Language: Cpp
PointerAlignment: Right
ReflowComments: true
SpaceBeforeParens: ControlStatements
SpacesInParentheses: false
TabWidth: 4
UseTab: Never


================================================
FILE: .github/actionlint.yml
================================================
# As of March 5, 2026, actionlint via super-linter 8.5.0 does not support macOS 26, so we ignore the runner-label warning for now.
paths:
  .github/workflows/**/*.{yml,yaml}:
    ignore:
      - 'label "macos-26" is unknown.+'
      - 'label "macos-26-intel" is unknown.+'


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
    open-pull-requests-limit: 5
    groups:
      dependencies:
        patterns:
          - "*"
    cooldown:
      default-days: 7
    commit-message:
      prefix: "chore(ci)"
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
    open-pull-requests-limit: 5
    groups:
      dependencies:
        patterns:
          - "*"
    cooldown:
      default-days: 7
    commit-message:
      prefix: "chore"
      include: "scope"


================================================
FILE: .github/workflows/benchmark-base-hash.yml
================================================
---
name: Benchmark Base Hash

on:
  workflow_dispatch:

permissions: {}

jobs:
  benchmark:
    permissions:
      contents: read
      packages: read
    runs-on: ubuntu-24.04
    env:
      BENCHMARK_MAX_SIZE: 65536
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.14"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .
          pip install ".[benchmark]"
      - name: Tune the system for benchmarking
        run: |
          echo "Running \"lscpu -a -e\"..."
          lscpu -a -e

          echo -n "Checking randomize_va_space: "
          cat /proc/sys/kernel/randomize_va_space
          echo "randomize_va_space should be 2, meaning ASLR is fully enabled."

          systemctl status irqbalance
          echo "Stopping irqbalance..."
          sudo systemctl stop irqbalance

          echo -n "Checking default_smp_affinity: "
          cat /proc/irq/default_smp_affinity
          echo 3 | sudo tee /proc/irq/default_smp_affinity > /dev/null
          echo -n "Updated default_smp_affinity to: "
          cat /proc/irq/default_smp_affinity

          echo -n "Checking perf_event_max_sample_rate: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
          echo 1 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null
          echo -n "Updated perf_event_max_sample_rate to: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
      - name: Benchmark hash functions
        run: |
          mkdir var
          taskset -c 2,3 python benchmark/benchmark.py \
            -o var/mmh3_base_hash_500.json \
            --test-hash mmh3_base_hash \
            --test-buffer-size-max "$BENCHMARK_MAX_SIZE"
          taskset -c 2,3 python benchmark/benchmark.py \
            -o var/mmh3_32_500.json \
            --test-hash mmh3_32 \
            --test-buffer-size-max "$BENCHMARK_MAX_SIZE"
          pip uninstall -y mmh3
          pip install mmh3==4.1.0
          taskset -c 2,3 python benchmark/benchmark.py \
            -o var/mmh3_base_hash_410.json \
            --test-hash mmh3_base_hash \
            --test-buffer-size-max "$BENCHMARK_MAX_SIZE"
      - name: Reset the system from benchmarking
        run: |
          echo -n "Checking perf_event_max_sample_rate: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
          echo 100000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null
          echo -n "Updated perf_event_max_sample_rate to: "
          cat /proc/sys/kernel/perf_event_max_sample_rate

          echo -n "Checking default_smp_affinity: "
          cat /proc/irq/default_smp_affinity
          echo f | sudo tee /proc/irq/default_smp_affinity > /dev/null
          echo -n "Updated default_smp_affinity to: "
          cat /proc/irq/default_smp_affinity

          echo "Restarting irqbalance..."
          sudo systemctl restart irqbalance
          systemctl status irqbalance
      - name: Upload artifacts
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: benchmark-results
          path: var


================================================
FILE: .github/workflows/benchmark.yml
================================================
---
name: Benchmark

on:
  workflow_dispatch:

permissions: {}

jobs:
  benchmark:
    permissions:
      contents: read
      packages: read
    runs-on: ubuntu-24.04
    env:
      BENCHMARK_MAX_SIZE: 262144
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.14"
      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install .
          pip install ".[benchmark]"
      - name: Tune the system for benchmarking
        run: |
          echo "Running \"lscpu -a -e\"..."
          lscpu -a -e

          echo -n "Checking randomize_va_space: "
          cat /proc/sys/kernel/randomize_va_space
          echo "randomize_va_space should be 2, meaning ASLR is fully enabled."

          systemctl status irqbalance
          echo "Stopping irqbalance..."
          sudo systemctl stop irqbalance

          echo -n "Checking default_smp_affinity: "
          cat /proc/irq/default_smp_affinity
          echo 3 | sudo tee /proc/irq/default_smp_affinity > /dev/null
          echo -n "Updated default_smp_affinity to: "
          cat /proc/irq/default_smp_affinity

          echo -n "Checking perf_event_max_sample_rate: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
          echo 1 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null
          echo -n "Updated perf_event_max_sample_rate to: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
      - name: Benchmark hash functions
        run: |
          mkdir var
          declare -a hash_list=("mmh3_32" "mmh3_128" "xxh_32" "xxh_64" \
             "xxh3_64" "xxh3_128" "md5" "sha1")
          for hash_name in "${hash_list[@]}"; do
            echo "${hash_name}"
            taskset -c 2,3 python benchmark/benchmark.py \
              -o var/"${hash_name}".json \
              --test-hash "${hash_name}" \
              --test-buffer-size-max "$BENCHMARK_MAX_SIZE"
          done
      - name: Reset the system from benchmarking
        run: |
          echo -n "Checking perf_event_max_sample_rate: "
          cat /proc/sys/kernel/perf_event_max_sample_rate
          echo 100000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null
          echo -n "Updated perf_event_max_sample_rate to: "
          cat /proc/sys/kernel/perf_event_max_sample_rate

          echo -n "Checking default_smp_affinity: "
          cat /proc/irq/default_smp_affinity
          echo f | sudo tee /proc/irq/default_smp_affinity > /dev/null
          echo -n "Updated default_smp_affinity to: "
          cat /proc/irq/default_smp_affinity

          echo "Restarting irqbalance..."
          sudo systemctl restart irqbalance
          systemctl status irqbalance
      - name: Upload artifacts
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: benchmark-results
          path: var


================================================
FILE: .github/workflows/build.yml
================================================
# This workflow is intended for quick building tests.
# Use wheels.yml for complete building/uploading tests.
---
name: Build

on: # yamllint disable-line rule:truthy
  push:
    branches: "**"
  pull_request:
    types:
      - opened
      - synchronize
      - reopened

permissions: {}

jobs:
  build:
    permissions:
      contents: read
      packages: read

    strategy:
      matrix:
        os: [macos-26, windows-2025, ubuntu-24.04]
        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"]

    runs-on: ${{ matrix.os }}
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install setuptools build
          pip install .
          pip install ".[test,type]"
      - name: Test with pytest
        run: python -m pytest
      - name: Test type hints with mypy
        run: mypy --strict tests
      - name: Test building from the source distribution
        shell: bash
        run: |
          pip uninstall -y mmh3
          python -m build --sdist
          python -m pip install dist/*.tar.gz
          python -m pytest
          mypy --strict tests


================================================
FILE: .github/workflows/draft-pdf.yml
================================================
---
name: Draft Paper

on:
  push:
    branches:
      - paper
  workflow_dispatch:

permissions: {}

jobs:
  paper:
    permissions:
      contents: read
      packages: read

    runs-on: ubuntu-latest

    name: Paper Draft
    if: github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/master'
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Build draft PDF
        uses: openjournals/openjournals-draft-action@85a18372e48f551d8af9ddb7a747de685fbbb01c # v1.0
        with:
          journal: joss
          # This should be the path to the paper within your repo.
          paper-path: paper/paper.md
      - name: Upload
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: paper
          # This is the output path where Pandoc will write the compiled
          # PDF. Note, this should be the same directory as the input
          # paper.md
          path: paper/paper.pdf


================================================
FILE: .github/workflows/superlinter.yml
================================================
---
name: Super-Linter

on: # yamllint disable-line rule:truthy
  push:
    branches: "**"
  pull_request:
    types:
      - opened
      - synchronize
      - reopened

permissions: {}

jobs:
  # Set the job key. The key is displayed as the job name
  # when a job name is not provided
  super-lint:
    # Name the Job
    name: Lint code base
    # Set the type of machine to run on
    runs-on: ubuntu-latest

    permissions:
      contents: read
      packages: read
      # To report GitHub Actions status checks
      statuses: write

    steps:
      # Checks out a copy of your repository on the ubuntu-latest machine
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
          # super-linter needs the full git history to get the
          # list of files that changed across commits
          fetch-depth: 0

      # Runs the Super-Linter action
      - name: Run Super-Linter
        uses: super-linter/super-linter@61abc07d755095a68f4987d1c2c3d1d64408f1f9 # v8.5.0
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          LINTER_RULES_PATH: /
          GITHUB_ACTIONS_CONFIG_FILE: .github/actionlint.yml
          PYTHON_PYLINT_CONFIG_FILE: pyproject.toml
          PYTHON_RUFF_CONFIG_FILE: pyproject.toml
          PYTHON_RUFF_FORMAT_CONFIG_FILE: pyproject.toml
          # Suppressed because it conflicts with clang-format in some cases
          VALIDATE_CPP: false
          # Suppressed because copy/paste is sometimes required at low level
          VALIDATE_JSCPD: false
          # Suppressed in favor of Ruff
          VALIDATE_PYTHON_BLACK: false
          VALIDATE_PYTHON_FLAKE8: false
          VALIDATE_PYTHON_ISORT: false
          # Suppressed because it even accuses book titles
          VALIDATE_NATURAL_LANGUAGE: false
          # Suppressed because it does not honor the ignore-paths option
          VALIDATE_PYTHON_PYLINT: false
      # super-linter 7 does not honor the ignore-paths option of pylint
      # so we run pylint separately
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.14"
      - name: Run pylint
        run: |
          pip install pylint
          pylint --recursive=y .


================================================
FILE: .github/workflows/wheels.yml
================================================
---
name: Wheel-Builder

on:
  push:
    tags:
      - "v*.*.*"
  workflow_dispatch:

permissions: {}

jobs:
  build_wheels:
    name: Build wheel for ${{ matrix.platform }} ${{ matrix.archs }} ${{ matrix.build }} (runs on ${{ matrix.os }})
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-24.04]
        archs: [x86_64, i686, aarch64, ppc64le, s390x]
        build: [manylinux, musllinux]
        include:
          - os: windows-2025
            archs: AMD64
          - os: windows-2025
            archs: x86
          - os: windows-2025
            archs: ARM64
          - os: macos-26-intel
            archs: x86_64
          - os: macos-26
            archs: arm64
          - os: macos-26
            archs: universal2
          - os: ubuntu-24.04
            platform: android
            archs: x86_64
            build: android
          - os: macos-26
            platform: android
            archs: arm64_v8a
            build: android
          - os: macos-26
            platform: ios
            archs: arm64_iphoneos
          - os: macos-26
            platform: ios
            archs: arm64_iphonesimulator
          - os: macos-26-intel
            platform: ios
            archs: x86_64_iphonesimulator
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.14"
      - name: Set dev version for TestPyPI
        if: github.event_name == 'workflow_dispatch'
        shell: python
        run: |
          import re, datetime
          timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d%H%M")
          text = open("pyproject.toml", encoding="utf-8").read()
          m = re.search(r'version\s*=\s*"(.+?)"', text)
          if not m:
            raise RuntimeError("version field not found in pyproject.toml")
          version = m.group(1)
          base_version = version.split("-")[0]
          new_text = re.sub(
            r'version\s*=\s*".*?"',
            f'version = "{base_version}.dev{timestamp}"',
            text,
            count=1
          )
          open("pyproject.toml", "w", encoding="utf-8").write(new_text)
      - name: Set up QEMU
        if: runner.os == 'Linux' && matrix.platform != 'android'
        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
      # https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/
      - name: Set up KVM for Android emulation
        if: runner.os == 'Linux' && matrix.platform == 'android'
        run: |
          echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules
          sudo udevadm control --reload-rules
          sudo udevadm trigger --name-match=kvm
      - name: Build wheels
        uses: pypa/cibuildwheel@ee02a1537ce3071a004a6b08c41e72f0fdc42d9a # v3.4.0
        with:
          output-dir: wheelhouse
        env:
          CIBW_BUILD: "{cp310,cp311,cp312,cp313,cp314,cp314t}-${{ matrix.build }}*"
          CIBW_PLATFORM: ${{ matrix.platform || 'auto' }}
          CIBW_ARCHS: ${{ matrix.archs }}
          CIBW_BUILD_FRONTEND: "build"
          CIBW_TEST_REQUIRES: "pytest"
          CIBW_TEST_SOURCES_ANDROID: "./tests"
          CIBW_TEST_SOURCES_IOS: "./tests"
          CIBW_TEST_COMMAND: "pytest {project}"
          CIBW_TEST_COMMAND_ANDROID: "python -m pytest ./tests"
          CIBW_TEST_COMMAND_IOS: "python -m pytest ./tests"
          CIBW_TEST_SKIP: "*-win_arm64 *-android_arm64_v8a"
      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: Wheel-${{ matrix.os }}-${{ matrix.platform }}-${{ matrix.build }}${{ matrix.archs }}
          path: ./wheelhouse/*.whl
  build_sdist:
    name: Build a source distribution
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.14"
      - name: Set dev version for TestPyPI
        if: github.event_name == 'workflow_dispatch'
        shell: python
        run: |
          import re, datetime
          timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d%H%M")
          text = open("pyproject.toml", encoding="utf-8").read()
          m = re.search(r'version\s*=\s*"(.+?)"', text)
          if not m:
            raise RuntimeError("version field not found in pyproject.toml")
          version = m.group(1)
          base_version = version.split("-")[0]
          new_text = re.sub(
            r'version\s*=\s*".*?"',
            f'version = "{base_version}.dev{timestamp}"',
            text,
            count=1
          )
          open("pyproject.toml", "w", encoding="utf-8").write(new_text)
      - name: Build sdist
        run: |
          python -m pip install --upgrade pip
          pip install setuptools build
          python -m build --sdist
      - name: Test building from the source distribution
        shell: bash
        run: |
          pip install ".[test,type]"
          pip uninstall -y mmh3
          python -m pip install dist/*.tar.gz
          python -m pytest
          mypy --strict tests
      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          path: dist/*.tar.gz
  publish-to-pypi:
    name: "Publish artifacts to PyPI"
    if: startsWith(github.ref, 'refs/tags/')
    needs: [build_wheels, build_sdist]
    runs-on: ubuntu-24.04
    environment:
      name: pypi
      url: https://pypi.org/p/mmh3
    permissions:
      id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
    steps:
      - name: Set up built items
        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
        with:
          path: dist
          merge-multiple: true
      - name: Publish package distributions to PyPI
        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
  publish-to-testpypi:
    name: "Publish artifacts to TestPyPI"
    if: github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/master'
    needs: [build_wheels, build_sdist]
    runs-on: ubuntu-24.04
    environment:
      name: testpypi
      url: https://test.pypi.org/p/mmh3
    permissions:
      id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
    steps:
      - name: Set up built items
        uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
        with:
          path: dist
          merge-multiple: true
      - name: Publish package distributions to TestPyPI
        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
        with:
          repository-url: https://test.pypi.org/legacy/


================================================
FILE: .gitignore
================================================
# From https://github.com/github/gitignore
# CC0 1.0 Universal

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# pytest
.pytest_cache/

# mypy
.mypy_cache/

# macOS
.DS_Store

# vscode
.vscode/

# Directory from an external source created by git submodule update --init
# Adding this path is useful for other tools like markdwonlint and prettier
util/smhasher/


================================================
FILE: .gitmodules
================================================
[submodule "src/mmh3/_mmh3/smhasher"]
	path = util/smhasher
	url = https://github.com/aappleby/smhasher.git


================================================
FILE: .markdown-lint.yml
================================================
# MD024/no-duplicate-heading : Multiple headings with the same content : https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md024.md
MD024:
  # Only check sibling headings (default is false)
  # Set to true to conform to the Keep a Changelog format
  # See also https://github.com/olivierlacan/keep-a-changelog/issues/274#issuecomment-484065486
  siblings_only: true


================================================
FILE: .readthedocs.yml
================================================
# Read the Docs configuration file for MkDocs projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for detail

# Required
version: 2

# Set the version of Python and other tools you might need
build:
  os: ubuntu-22.04
  tools:
    python: "3.12"

sphinx:
  builder: html
  configuration: docs/conf.py

# Build all formats
formats:
  - pdf
  - epub

# Optionally declare the Python requirements required to build your docs
python:
  install:
    - method: pip
      path: .
      extra_requirements:
        - docs


================================================
FILE: CHANGELOG.md
================================================
# Changelog

All notable changes to this project will be documented here. For a list of
contributors, see the
[Contributors](https://mmh3.readthedocs.io/en/stable/CONTRIBUTORS.html) page.

The format is based on
[Keep a Changelog 1.1.0](https://keepachangelog.com/en/1.1.0/).
This project has adhered to
[Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html)
since version 3.0.0.

## [5.2.1] - 2026-03-06

### Added

- Add support for the Android wheel for Python 3.14.

### Removed

- Drop support for Python 3.9, as it has reached the end of life on 2025-10-31.

## [5.2.0] - 2025-07-29

### Added

- Add support for Python 3.14, including 3.14t (no-GIL) wheels. However, thread
  safety for the no-GIL variant is not fully tested yet. Please report any
  issues you encounter ([#134](https://github.com/hajimes/mmh3/pull/134),
  [#136](https://github.com/hajimes/mmh3/pull/136)).
- Add support for Android (Python 3.13 only) and iOS (Python 3.13 and 3.14) wheels,
  enabled by the major version update of
  [cibuildwheel](https://github.com/pypa/cibuildwheel)
  ([#135](https://github.com/hajimes/mmh3/pull/135)).

## [5.1.0] - 2025-01-25

### Added

- Improve the performance of `hash128()`, `hash64()`, and `hash_bytes()` by
  using
  [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL),
  reducing the overhead of function calls
  ([#116](https://github.com/hajimes/mmh3/pull/116)).
- Add the software paper for this library
  ([doi:10.21105/joss.06124](https://doi.org/10.21105/joss.06124)), following
  its publication in the
  [_Journal of Open Source Software_](https://joss.theoj.org)
  ([#118](https://github.com/hajimes/mmh3/pull/118)).

### Removed

- Drop support for Python 3.8, as it has reached the end of life on 2024-10-07
  ([#117](https://github.com/hajimes/mmh3/pull/117)).

## [5.0.1] - 2024-09-22

### Fixed

- Fix the issue that the package cannot be built from the source distribution
  ([#90](https://github.com/hajimes/mmh3/issues/90)).

## [5.0.0] - 2024-09-18

### Added

- Add support for Python 3.13.
- Improve the performance of the `hash()` function with
  [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL),
  reducing the overhead of function calls. For data sizes between 1–2 KB
  (e.g., 48x48 favicons), performance is 10%–20% faster. For smaller data
  (~500 bytes, like 16x16 favicons), performance increases by approximately 30%
  ([#87](https://github.com/hajimes/mmh3/pull/87)).
- Add `digest` functions that support the new buffer protocol
  ([PEP 688](https://peps.python.org/pep-0688/)) as input
  ([#75](https://github.com/hajimes/mmh3/pull/75)).
  These functions are implemented with `METH_FASTCALL` too, offering improved
  performance ([#84](https://github.com/hajimes/mmh3/pull/84)).
- Slightly improve the performance of the `hash_bytes()` function
  ([#88](https://github.com/hajimes/mmh3/pull/88))
- Add Read the Docs documentation
  ([#54](https://github.com/hajimes/mmh3/issues/54)).
- Document benchmark results
  ([#53](https://github.com/hajimes/mmh3/issues/53)).

### Changed

- **Backward-incompatible**: The `seed` argument is now strictly validated to
  ensure it falls within the range [0, 0xFFFFFFFF]. A `ValueError` is raised
  if the seed is out of range ([#84](https://github.com/hajimes/mmh3/pull/84)).
- **Backward-incompatible**: Change the constructors of hasher classes to
  accept a buffer as the first argument
  ([#83](https://github.com/hajimes/mmh3/pull/83)).
- The type of flag arguments has been changed from `bool` to `Any`
  ([#84](https://github.com/hajimes/mmh3/pull/84)).
- Change the format of CHANGELOG.md to conform to the
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard
  ([#63](https://github.com/hajimes/mmh3/pull/63)).

### Deprecated

- Deprecate the `hash_from_buffer()` function.
  Use `mmh3_32_sintdigest()` or `mmh3_32_uintdigest()` as alternatives
  ([#84](https://github.com/hajimes/mmh3/pull/84)).

### Fixed

- Fix a reference leak in the `hash_from_buffer()` function
  ([#75](https://github.com/hajimes/mmh3/pull/75)).
- Fix type hints ([#76](https://github.com/hajimes/mmh3/pull/76),
  [#77](https://github.com/hajimes/mmh3/pull/77),
  [#84](https://github.com/hajimes/mmh3/pull/84)).

## [4.1.0] - 2024-01-09

### Added

- Add support for Python 3.12.

### Fixed

- Fix issues with Bazel by changing the directory structure of the project
  ([#50](https://github.com/hajimes/mmh3/issues/50)).
- Fix incorrect type hints ([#51](https://github.com/hajimes/mmh3/issues/51)).
- Fix invalid results on s390x when the arg `x64arch` of `hash64` or
  `hash_bytes()` is set to `False`
  ([#52](https://github.com/hajimes/mmh3/issues/52)).

## [4.0.1] - 2023-07-14

### Changed

- Refactor the project structure
  ([#48](https://github.com/hajimes/mmh3/issues/48)).

### Fixed

- Fix incorrect type hints.

## [4.0.0] - 2023-05-22

### Added

- Add experimental support for `hashlib`-compliant hasher classes
  ([#39](https://github.com/hajimes/mmh3/issues/39)). Note that they are not yet
  fully tuned for performance.
- Add support for type hints ([#44](https://github.com/hajimes/mmh3/issues/44)).
- Add wheels for more platforms (`musllinux`, `s390x`, `win_arm64`, and
  `macosx_universal2`).
- Add a code of conduct (the ACM Code of Ethics and Professional Conduct).

### Changed

- Switch license from CC0 to MIT
  ([#43](https://github.com/hajimes/mmh3/issues/43)).

### Removed

- **Backward-incompatible**: A hash function now returns the same value under
  big-endian platforms as that under little-endian ones
  ([#47](https://github.com/hajimes/mmh3/issues/47)).
- **Backward-incompatible**: Remove the `__version__` constant from the module
  ([#42](https://github.com/hajimes/mmh3/issues/42)). Use `importlib.metadata`
  instead.
- Drop support for Python 3.7, as it will reach the end of life on 2023-06-27.

## [3.1.0] - 2023-03-24

### Added

- Add support for Python 3.10 and 3.11
  ([#35](https://github.com/hajimes/mmh3/pull/35),
  [#37](https://github.com/hajimes/mmh3/pull/37)).
- Add support for 32-bit architectures such as `i686` and `armv7l`. From now on,
  `hash()` and `hash_from_buffer()` on these architectures will generate the
  same hash values as those on other environments
  ([#40](https://github.com/hajimes/mmh3/pull/40)).
- In relation to the above, `manylinux2014_i686` wheels are now available.
- Support for hashing huge data (>16GB)
  ([#34](https://github.com/hajimes/mmh3/pull/34)).

### Removed

- Drop support for Python 3.6; remove legacy code for Python 2.x at the source
  code level.

## [3.0.0] - 2021-02-23

### Added

- Python wheels are now available, thanks to the power of
  [cibuildwheel](https://github.com/joerick/cibuildwheel).
  - Supported platforms are `manylinux1_x86_64`, `manylinux2010_x86_64`,
    `manylinux2014_aarch64`, `win32`, `win_amd64`, `macosx_10_9_x86_64`, and
    `macosx_11_0_arm64` (Apple Silicon).
- Add support for newer macOS environments
  ([#22](https://github.com/hajimes/mmh3/pull/22)).
- Add support for Python 3.7, 3.8, and 3.9.

### Changed

- Migrate CI from Travis CI and AppVeyor to GitHub Actions.

### Removed

- Drop support for Python 2.7, 3.3, 3.4, and 3.5.

## [2.5.1] - 2017-10-31

### Fixed

- Bugfix for `hash_bytes()` ([#15](https://github.com/hajimes/mmh3/pull/15)).

## [2.5] - 2017-10-28

### Added

- Add `hash_from_buffer()` ([#13](https://github.com/hajimes/mmh3/pull/13)).
- Add a keyword argument `signed`.

## [2.4] - 2017-05-27

### Added

- Support seeds with 32-bit unsigned integers
  ([#6](https://github.com/hajimes/mmh3/pull/6)).
- Support 64-bit data (under 64-bit environments)
- Add unit testing and continuous integration with Travis CI and AppVeyor.

### Fixed

- Fix compile errors for Python 3.6 under Windows systems.

## [2.3.2] - 2017-05-26

### Changed

- Relicensed from public domain to CC0-1.0.

## [2.3.1] - 2015-06-07

### Fixed

- Fix compile errors for gcc >=5.

## [2.3] - 2013-12-08

### Added

- Add `hash128()`, which returns a 128-bit signed integer
  ([#3](https://github.com/hajimes/mmh3/pull/3)).

### Fixed

- Fix a misplaced operator which could cause memory leak in a rare condition
  ([#2](https://github.com/hajimes/mmh3/pull/2)).
- Fix a malformed value to a Python/C API function which may cause runtime
  errors in recent Python 3.x versions.

## [2.2] - 2013-03-03

### Added

- Improve portability to support systems with old gcc (version < 4.4) such as
  CentOS/RHEL 5.x
  ([#1](https://github.com/hajimes/mmh3/pull/1)).

## [2.1] - 2013-02-25

### Added

- Add `__version__` constant. Check if it exists when the following revision
  matters for your application.

### Changed

- Incorporate the revision r147, which includes robustness improvement and minor
  tweaks.

Beware that due to this revision, **the result of 32-bit version of 2.1 is NOT
the same as that of 2.0**. E.g.,:

```pycon
>>> mmh3.hash("foo") # in mmh3 2.0
-292180858
>>> mmh3.hash("foo") # in mmh3 2.1
-156908512
```

The results of hash64 and hash_bytes remain unchanged. Austin Appleby, the
author of Murmurhash, ensured this revision was the final modification to
MurmurHash3's results and any future changes would be to improve performance
only.

## [2.0] - 2011-06-07

### Added

- Support both Python 2.7 and 3.x.

### Changed

- Change the module interface.

## [1.0] - 2011-04-27

### Added

- As
  [Softpedia collected mmh3 1.0 on April 27, 2011](https://web.archive.org/web/20110430172027/https://linux.softpedia.com/get/Programming/Libraries/mmh3-68314.shtml),
  it must have been uploaded to PyPI on or slightly before this date.

[5.2.1]: https://github.com/hajimes/mmh3/compare/v5.2.0...v5.2.1
[5.2.0]: https://github.com/hajimes/mmh3/compare/v5.1.0...v5.2.0
[5.1.0]: https://github.com/hajimes/mmh3/compare/v5.0.1...v5.1.0
[5.0.1]: https://github.com/hajimes/mmh3/compare/v5.0.0...v5.0.1
[5.0.0]: https://github.com/hajimes/mmh3/compare/v4.1.0...v5.0.0
[4.1.0]: https://github.com/hajimes/mmh3/compare/v4.0.1...v4.1.0
[4.0.1]: https://github.com/hajimes/mmh3/compare/v4.0.0...v4.0.1
[4.0.0]: https://github.com/hajimes/mmh3/compare/v3.1.0...v4.0.0
[3.1.0]: https://github.com/hajimes/mmh3/compare/v3.0.0...v3.1.0
[3.0.0]: https://github.com/hajimes/mmh3/compare/v2.5.1...v3.0.0
[2.5.1]: https://github.com/hajimes/mmh3/compare/v2.5...v2.5.1
[2.5]: https://github.com/hajimes/mmh3/compare/v2.4...v2.5
[2.4]: https://github.com/hajimes/mmh3/compare/v2.3.2...v2.4
[2.3.2]: https://github.com/hajimes/mmh3/compare/v2.3.1...v2.3.2
[2.3.1]: https://github.com/hajimes/mmh3/compare/v2.3...v2.3.1
[2.3]: https://github.com/hajimes/mmh3/compare/v2.2...v2.3
[2.2]: https://github.com/hajimes/mmh3/compare/v2.1...v2.2
[2.1]: https://github.com/hajimes/mmh3/compare/v2.0...v2.1
[2.0]: https://github.com/hajimes/mmh3/releases/tag/v2.0
[1.0]: https://web.archive.org/web/20110430172027/https://linux.softpedia.com/get/Programming/Libraries/mmh3-68314.shtml


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2011-2026 Hajime Senuma

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: README.md
================================================
# mmh3

[![Documentation Status](https://readthedocs.org/projects/mmh3/badge/?version=stable)](https://mmh3.readthedocs.io/en/stable/)
[![GitHub Super-Linter](https://github.com/hajimes/mmh3/actions/workflows/superlinter.yml/badge.svg?branch=master)](https://github.com/hajimes/mmh3/actions?query=workflow%3ASuper-Linter+branch%3Amaster)
[![Build](https://github.com/hajimes/mmh3/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/hajimes/mmh3/actions/workflows/build.yml?branch=master)
[![PyPi Version](https://img.shields.io/pypi/v/mmh3.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/mmh3/)
[![Python Versions](https://img.shields.io/pypi/pyversions/mmh3.svg)](https://pypi.org/project/mmh3/)
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/license/mit/)
[![Total Downloads](https://static.pepy.tech/badge/mmh3)](https://pepy.tech/projects/mmh3?versions=*%2C5.*%2C4.*%2C3.*%2C2.*)
[![Recent Downloads](https://static.pepy.tech/badge/mmh3/month)](https://pepy.tech/projects/mmh3?versions=*%2C5.*%2C4.*%2C3.*%2C2.*)
[![DOI](https://joss.theoj.org/papers/10.21105/joss.06124/status.svg)](https://doi.org/10.21105/joss.06124)

`mmh3` is a Python extension for
[MurmurHash (MurmurHash3)](https://en.wikipedia.org/wiki/MurmurHash), a set of
fast and robust non-cryptographic hash functions invented by Austin Appleby.

By combining `mmh3` with probabilistic techniques like
[Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter),
[MinHash](https://en.wikipedia.org/wiki/MinHash), and
[feature hashing](https://en.wikipedia.org/wiki/Feature_hashing), you can
develop high-performance systems in fields such as data mining, machine
learning, and natural language processing.

Another popular use of `mmh3` is to
[calculate favicon hashes](https://gist.github.com/yehgdotnet/b9dfc618108d2f05845c4d8e28c5fc6a),
which are utilized by [Shodan](https://www.shodan.io), the world's first IoT
search engine.

This page provides a quick start guide. For more comprehensive information,
please refer to the [documentation](https://mmh3.readthedocs.io/en/stable/).

## Installation

```shell
pip install mmh3
```

## Usage

### Basic usage

```pycon
>>> import mmh3
>>> mmh3.hash(b"foo") # returns a 32-bit signed int
-156908512
>>> mmh3.hash("foo") # accepts str (UTF-8 encoded)
-156908512
>>> mmh3.hash(b"foo", 42) # uses 42 as the seed
-1322301282
>>> mmh3.hash(b"foo", 0, False) # returns a 32-bit unsigned int
4138058784
```

`mmh3.mmh3_x64_128_digest()`, introduced in version 5.0.0, efficienlty hashes
buffer objects that implement the buffer protocol
([PEP 688](https://peps.python.org/pep-0688/)) without internal memory copying.
The function returns a `bytes` object of 16 bytes (128 bits). It is
particularly suited for hashing large memory views, such as
`bytearray`, `memoryview`, and `numpy.ndarray`, and performs faster than
the 32-bit variants like `hash()` on 64-bit machines.

```pycon
>>> mmh3.mmh3_x64_128_digest(numpy.random.rand(100))
b'\x8c\xee\xc6z\xa9\xfeR\xe8o\x9a\x9b\x17u\xbe\xdc\xee'
```

Various alternatives are available, offering different return types (e.g.,
signed integers, tuples of unsigned integers) and optimized for different
architectures. For a comprehensive list of functions, refer to the
[API Reference](https://mmh3.readthedocs.io/en/stable/api.html).

### `hashlib`-style hashers

`mmh3` implements hasher objects with interfaces similar to those in `hashlib`
from the standard library, although they are still experimental. See
[Hasher Classes](https://mmh3.readthedocs.io/en/stable/api.html#hasher-classes)
in the API Reference for more information.

## Changelog

See [Changelog (latest version)](https://mmh3.readthedocs.io/en/latest/changelog.html)
for the complete changelog.

### [5.2.1] - 2026-03-06

#### Added

- Add support for the Android wheel for Python 3.14.

#### Removed

- Drop support for Python 3.9, as it has reached the end of life on 2025-10-31.

### [5.2.0] - 2025-07-29

#### Added

- Add support for Python 3.14, including 3.14t (no-GIL) wheels. However, thread
  safety for the no-GIL variant is not fully tested yet. Please report any
  issues you encounter ([#134](https://github.com/hajimes/mmh3/pull/134),
  [#136](https://github.com/hajimes/mmh3/pull/136)).
- Add support for Android (Python 3.13 only) and iOS (Python 3.13 and 3.14) wheels,
  enabled by the major version update of
  [cibuildwheel](https://github.com/pypa/cibuildwheel)
  ([#135](https://github.com/hajimes/mmh3/pull/135)).

### [5.1.0] - 2025-01-25

#### Added

- Improve the performance of `hash128()`, `hash64()`, and `hash_bytes()` by
  using
  [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL),
  reducing the overhead of function calls
  ([#116](https://github.com/hajimes/mmh3/pull/116)).
- Add the software paper for this library
  ([doi:10.21105/joss.06124](https://doi.org/10.21105/joss.06124)), following
  its publication in the
  [_Journal of Open Source Software_](https://joss.theoj.org)
  ([#118](https://github.com/hajimes/mmh3/pull/118)).

#### Removed

- Drop support for Python 3.8, as it has reached the end of life on 2024-10-07
  ([#117](https://github.com/hajimes/mmh3/pull/117)).

## License

[MIT](https://github.com/hajimes/mmh3/blob/master/LICENSE), unless otherwise
noted within a file.

## Frequently Asked Questions

### Different results from other MurmurHash3-based libraries

By default, `mmh3` returns **signed** values for the 32-bit and 64-bit versions
and **unsigned** values for `hash128` due to historical reasons. To get the
desired result, use the `signed` keyword argument.

Starting from version 4.0.0, **`mmh3` is endian-neutral**, meaning that its
hash functions return the same values on big-endian platforms as they do on
little-endian ones. In contrast, the original C++ library by Appleby is
endian-sensitive. If you need results that comply with the original library on
big-endian systems, please use version 3.\*.

For compatibility with [Google Guava (Java)](https://github.com/google/guava),
see
<https://stackoverflow.com/questions/29932956/murmur3-hash-different-result-between-python-and-java-implementation>.

For compatibility with
[murmur3 (Go)](https://pkg.go.dev/github.com/spaolacci/murmur3), see
<https://github.com/hajimes/mmh3/issues/46>.

### Handling errors with negative seeds

From the version 5.0.0, `mmh3` functions accept only **unsigned** 32-bit integer
seeds to enable faster type-checking and conversion. However, this change may
cause issues if you need to calculate hash values using negative seeds within
the range of signed 32-bit integers. For instance,
[Telegram-iOS](https://github.com/TelegramMessenger/Telegram-iOS) uses
`-137723950` as a hard-coded seed (bitwise equivalent to `4157243346`). To
handle such cases, you can convert a signed 32-bit integer to its unsigned
equivalent by applying a bitwise AND operation with `0xffffffff`. Here's an
example:

```pycon
>>> mmh3.hash(b"quux", 4294967295)
258499980
>>> d = -1
>>> mmh3.hash(b"quux", d & 0xffffffff)
258499980
```

Alternatively, if the seed is hard-coded (as in the Telegram-iOS case), you can
precompute the unsigned value for simplicity.

## Contributing Guidelines

See [Contributing](https://mmh3.readthedocs.io/en/stable/CONTRIBUTING.html).

## Authors

MurmurHash3 was originally developed by Austin Appleby and distributed under
public domain
[https://github.com/aappleby/smhasher](https://github.com/aappleby/smhasher).

Ported and modified for Python by Hajime Senuma.

## External Tutorials

### High-performance computing

The following textbooks and tutorials are great resources for learning how to
use `mmh3` (and other hash algorithms in general) for high-performance computing.

- Chapter 11: _Using Less Ram_ in Micha Gorelick and Ian Ozsvald. 2014. _High
  Performance Python: Practical Performant Programming for Humans_. O'Reilly
  Media. [ISBN: 978-1-4493-6159-4](https://www.amazon.com/dp/1449361595).
  - 3rd edition of the above (2025).
    [ISBN: 978-1098165963](https://www.amazon.com/dp/1098165969/).
- Max Burstein. February 2, 2013.
  _[Creating a Simple Bloom Filter](http://www.maxburstein.com/blog/creating-a-simple-bloom-filter/)_.
- Duke University. April 14, 2016.
  _[Efficient storage of data in memory](http://people.duke.edu/~ccc14/sta-663-2016/20B_Big_Data_Structures.html)_.
- Bugra Akyildiz. August 24, 2016.
  _[A Gentle Introduction to Bloom Filter](https://www.kdnuggets.com/2016/08/gentle-introduction-bloom-filter.html)_.
  KDnuggets.

### Internet of things

[Shodan](https://www.shodan.io), the world's first
[IoT](https://en.wikipedia.org/wiki/Internet_of_things) search engine, uses
MurmurHash3 hash values for [favicons](https://en.wikipedia.org/wiki/Favicon)
(icons associated with web pages). [ZoomEye](https://www.zoomeye.org) follows
Shodan's convention.
[Calculating these values with mmh3](https://gist.github.com/yehgdotnet/b9dfc618108d2f05845c4d8e28c5fc6a)
is useful for OSINT and cybersecurity activities.

- Jan Kopriva. April 19, 2021.
  _[Hunting phishing websites with favicon hashes](https://isc.sans.edu/diary/Hunting+phishing+websites+with+favicon+hashes/27326)_.
  SANS Internet Storm Center.
- Nikhil Panwar. May 2, 2022.
  _[Using Favicons to Discover Phishing & Brand Impersonation Websites](https://bolster.ai/blog/how-to-use-favicons-to-find-phishing-websites)_.
  Bolster.
- Faradaysec. July 25, 2022.
  _[Understanding Spring4Shell: How used is it?](https://faradaysec.com/understanding-spring4shell/)_.
  Faraday Security.
- Debjeet. August 2, 2022.
  _[How To Find Assets Using Favicon Hashes](https://payatu.com/blog/favicon-hash/)_.
  Payatu.

## How to Cite This Library

If you use this library in your research, it would be appreciated if you could
cite the following paper published in the
[_Journal of Open Source Software_](https://joss.theoj.org):

Hajime Senuma. 2025.
[mmh3: A Python extension for MurmurHash3](https://doi.org/10.21105/joss.06124).
_Journal of Open Source Software_, 10(105):6124.

In BibTeX format:

```tex
@article{senumaMmh3PythonExtension2025,
  title = {{mmh3}: A {Python} extension for {MurmurHash3}},
  author = {Senuma, Hajime},
  year = {2025},
  month = jan,
  journal = {Journal of Open Source Software},
  volume = {10},
  number = {105},
  pages = {6124},
  issn = {2475-9066},
  doi = {10.21105/joss.06124},
  copyright = {http://creativecommons.org/licenses/by/4.0/}
}
```

## Related Libraries

- <https://github.com/wc-duck/pymmh3>: mmh3 in pure python (Fredrik Kihlander
  and Swapnil Gusani)
- <https://github.com/escherba/python-cityhash>: Python bindings for CityHash
  (Eugene Scherba)
- <https://github.com/veelion/python-farmhash>: Python bindings for FarmHash
  (Veelion Chong)
- <https://github.com/escherba/python-metrohash>: Python bindings for MetroHash
  (Eugene Scherba)
- <https://github.com/ifduyue/python-xxhash>: Python bindings for xxHash (Yue
  Du)

[5.2.1]: https://github.com/hajimes/mmh3/compare/v5.2.0...v5.2.1
[5.2.0]: https://github.com/hajimes/mmh3/compare/v5.1.0...v5.2.0
[5.1.0]: https://github.com/hajimes/mmh3/compare/v5.0.1...v5.1.0


================================================
FILE: benchmark/benchmark.py
================================================
"""Benchmark module for various hash functions."""

import hashlib
import itertools
import math
import random
import time
from collections.abc import Callable
from typing import Final

import pymmh3
import pyperf
import xxhash

import mmh3

K1: Final[int] = 0b1001111000110111011110011011000110000101111010111100101010000111
K2: Final[int] = 0b1100001010110010101011100011110100100111110101001110101101001111
MASK: Final[int] = 0xFFFFFFFFFFFFFFFF


def init_buffer(ba: bytearray) -> bytearray:
    """Initializes a byte array with a pattern.

    Initializes a byte array with a pattern based on xxHash's benchmarking.
    https://github.com/Cyan4973/xxHash/blob/dev/tests/bench/benchHash.c

    Args:
        ba: The byte array to initialize.

    Returns:
        The initialized byte array.
    """
    acc = K2

    for i, _ in enumerate(ba):
        acc = (acc * K1) & MASK
        ba[i] = acc >> 56

    return ba


def generate_size(size: int, p: float) -> int:
    """Generate a random size for a buffer.

    Args:
        size: The size of the buffer to hash.
        p: The percentage of the buffer size to vary.

    Returns:
        The random size of the buffer.
    """
    lower = math.ceil(size * (1 - p))
    upper = math.floor(size * (1 + p))

    return random.randint(lower, upper)


def perf_hash(loops: int, f: Callable, size: int) -> float:
    """Benchmark a hash function.

    Args:
        loops: The number of outer loops to run.
        f: The hash function to benchmark
        size: The size of the buffer to hash.

    Returns:
        The time taken to hash the buffer in fractional seconds.
    """
    # pylint: disable=too-many-locals

    if size <= 0:
        raise ValueError("size must be greater than 0")

    range_it = itertools.repeat(None, loops)

    data = bytearray(size + 9)
    data = init_buffer(data)

    data0 = bytes(data[0:size])
    data1 = bytes(data[1 : size + 1])
    data2 = bytes(data[2 : size + 2])
    data3 = bytes(data[3 : size + 3])
    data4 = bytes(data[4 : size + 4])
    data5 = bytes(data[5 : size + 5])
    data6 = bytes(data[6 : size + 6])
    data7 = bytes(data[7 : size + 7])
    data8 = bytes(data[8 : size + 8])
    data9 = bytes(data[9 : size + 9])

    t0 = time.perf_counter()
    for _ in range_it:
        f(data0)
        f(data1)
        f(data2)
        f(data3)
        f(data4)
        f(data5)
        f(data6)
        f(data7)
        f(data8)
        f(data9)

    return time.perf_counter() - t0


def perf_hash_random(loops: int, f: Callable, size: int) -> float:
    """Benchmark a hash function with varying data sizes.

    Args:
        loops: The number of outer loops to run.
        f: The hash function to benchmark
        size: The size of the buffer to hash.

    Returns:
        The time taken to hash the buffer in fractional seconds.
    """
    # pylint: disable=too-many-locals

    if size <= 0:
        raise ValueError("size must be greater than 0")

    range_it = itertools.repeat(None, loops)
    random.seed(42)
    inner_loops = 10
    extra_size = 255

    data = bytearray(size + extra_size)
    data = init_buffer(data)

    pos_list = [random.randint(0, extra_size) for _ in range(inner_loops)]
    size_list = [generate_size(size, 0.1) for _ in range(inner_loops)]

    data0 = bytes(data[pos_list[0] : pos_list[0] + size_list[0]])
    data1 = bytes(data[pos_list[1] : pos_list[1] + size_list[1]])
    data2 = bytes(data[pos_list[2] : pos_list[2] + size_list[2]])
    data3 = bytes(data[pos_list[3] : pos_list[3] + size_list[3]])
    data4 = bytes(data[pos_list[4] : pos_list[4] + size_list[4]])
    data5 = bytes(data[pos_list[5] : pos_list[5] + size_list[5]])
    data6 = bytes(data[pos_list[6] : pos_list[6] + size_list[6]])
    data7 = bytes(data[pos_list[7] : pos_list[7] + size_list[7]])
    data8 = bytes(data[pos_list[8] : pos_list[8] + size_list[8]])
    data9 = bytes(data[pos_list[9] : pos_list[9] + size_list[9]])

    t0 = time.perf_counter()
    for _ in range_it:
        f(data0)
        f(data1)
        f(data2)
        f(data3)
        f(data4)
        f(data5)
        f(data6)
        f(data7)
        f(data8)
        f(data9)

    return time.perf_counter() - t0


def perf_hash_latency(loops: int, f: Callable, size: int) -> float:
    """Benchmark a hash function with overhead costs with varying data sizes.

    Based on xxHash's ``benchLatency`` function.
    https://github.com/Cyan4973/xxHash/blob/dev/tests/bench/benchHash.c

    Args:
        loops: The number of outer loops to run.
        f: The hash function to benchmark
        size: The size of the buffer to hash.

    Returns:
        The time taken to hash the buffer in fractional seconds.
    """
    # pylint: disable=too-many-locals

    if size <= 0:
        raise ValueError("size must be greater than 0")

    range_it = itertools.repeat(None, loops)
    random.seed(42)

    n = 0

    size0 = generate_size(size, 0.1)
    size1 = generate_size(size, 0.1)
    size2 = generate_size(size, 0.1)
    size3 = generate_size(size, 0.1)
    size4 = generate_size(size, 0.1)
    size5 = generate_size(size, 0.1)
    size6 = generate_size(size, 0.1)
    size7 = generate_size(size, 0.1)
    size8 = generate_size(size, 0.1)
    size9 = generate_size(size, 0.1)

    data = bytearray(math.floor(size * 1.1) + 255)
    view_to_hash = memoryview(bytes(init_buffer(data)))

    t0 = time.perf_counter()
    for _ in range_it:
        n = f(view_to_hash[n : n + size0])[0]
        n = f(view_to_hash[n : n + size1])[0]
        n = f(view_to_hash[n : n + size2])[0]
        n = f(view_to_hash[n : n + size3])[0]
        n = f(view_to_hash[n : n + size4])[0]
        n = f(view_to_hash[n : n + size5])[0]
        n = f(view_to_hash[n : n + size6])[0]
        n = f(view_to_hash[n : n + size7])[0]
        n = f(view_to_hash[n : n + size8])[0]
        n = f(view_to_hash[n : n + size9])[0]

    return time.perf_counter() - t0


def add_cmdline_args(cmd: list, args) -> None:
    """Add command line arguments to the runner.

    Args:
        cmd: The command line arguments to extend.
        args: The parsed command line arguments.
    """
    cmd.extend(("--test-hash", args.test_hash))
    cmd.extend(("--test-type", args.test_type))
    cmd.extend(("--test-buffer-size-max", str(args.test_buffer_size_max)))


# "if hasattr" is used to check for the existence of the function in the
# module, to compare the performance of the current implementation with the
# old one (version 4.1.0), which does not implement the new functions.
# These conditions should be removed in the future.
HASHES = {
    "mmh3_base_hash": mmh3.hash,
    "mmh3_32": (
        mmh3.mmh3_32_digest if hasattr(mmh3, "mmh3_32_digest") else mmh3.hash_bytes
    ),
    "mmh3_128": (
        mmh3.mmh3_x64_128_digest
        if hasattr(mmh3, "mmh3_x64_128_digest")
        else mmh3.hash128
    ),
    "xxh_32": xxhash.xxh32_digest,
    "xxh_64": xxhash.xxh64_digest,
    "xxh3_64": xxhash.xxh3_64_digest,
    "xxh3_128": xxhash.xxh3_128_digest,
    "md5": lambda ba: hashlib.md5(ba).digest(),
    "sha1": lambda ba: hashlib.sha1(ba).digest(),
    "pymmh3_32": pymmh3.hash,
    "pymmh3_128": pymmh3.hash128,
}

BENCHMARKING_TYPES = {
    "naive": perf_hash,
    "random": perf_hash_random,
    "latency": perf_hash_latency,
}


if __name__ == "__main__":
    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)

    runner.argparser.add_argument(
        "--test-hash",
        type=str,
        help="Type of hash function to benchmark",
        required=True,
        choices=HASHES.keys(),
    )

    runner.argparser.add_argument(
        "--test-type",
        type=str,
        help="Type of benchmarking to perform (experimental)",
        choices=BENCHMARKING_TYPES.keys(),
        default="random",
    )

    runner.argparser.add_argument(
        "--test-buffer-size-max",
        type=int,
        help="The maximum size of the buffer to hash (default: 1024)",
        default=1024,
    )

    process_args = runner.parse_args()
    fib1, fib2 = 1, 2

    while fib1 <= process_args.test_buffer_size_max:
        runner.bench_time_func(
            f"{fib1} bytes",
            BENCHMARKING_TYPES[process_args.test_type],
            HASHES[process_args.test_hash],
            fib1,
            inner_loops=10,
        )
        fib1, fib2 = fib2, fib1 + fib2


================================================
FILE: benchmark/generate_table.py
================================================
# pylint: disable=R0801
"""An ad-hoc script to generate a markdown table of benchmarking results.

This file should be incorporated into the main plot module in the future.
"""

import argparse
import hashlib
import os
from typing import TypeVar

import pandas as pd
import pyperf
import xxhash

import mmh3

T = TypeVar("T")


def pad_with_nan(data: dict[T, list[float]]) -> dict[T, list[float]]:
    """Pad the data with NaN values to make the length of all lists equal.

    Args:
        data: The data to pad.

    Returns:
        The padded data.
    """

    max_len = max(len(v) for v in data.values())
    for k, v in data.items():
        data[k] = v + [float("nan")] * (max_len - len(v))

    return data


def ordered_intersection(list1: list[T], list2: list[T]) -> list[T]:
    """Return the intersection of two lists in the order of the first list.

    Args:
        list1: The first list.
        list2: The second list.

    Returns:
        The intersection of the two lists in the order of the first list.
    """

    return [item for item in list1 if item in list2]


DIGEST_SIZES = {
    "mmh3_base_hash": mmh3.mmh3_32().digest_size,
    "mmh3_32": mmh3.mmh3_32().digest_size,
    "mmh3_128": mmh3.mmh3_x64_128().digest_size,
    "xxh_32": xxhash.xxh32().digest_size,
    "xxh_64": xxhash.xxh64().digest_size,
    "xxh3_64": xxhash.xxh3_64().digest_size,
    "xxh3_128": xxhash.xxh3_128().digest_size,
    "md5": hashlib.md5().digest_size,
    "sha1": hashlib.sha1().digest_size,
    "pymmh3_32": mmh3.mmh3_32().digest_size,
    "pymmh3_128": mmh3.mmh3_x64_128().digest_size,
}

XXHASH_REFERENCE = {
    "mmh3_32": 3.9,
    "mmh3_128": None,
    "xxh_32": 9.7,
    "xxh_64": 9.1,
    "xxh3_64": 31.5,
    "xxh3_128": 29.6,
    "md5": 0.6,
    "sha1": 0.8,
}

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("filenames", nargs="+")
    args = parser.parse_args()

    result_latency: dict[str, list[float]] = {}
    index: list[int] = []

    for file_name in args.filenames:
        suite = pyperf.BenchmarkSuite.load(file_name)
        base_name = os.path.basename(file_name)
        hash_name = os.path.splitext(base_name)[0]

        result_latency[hash_name] = []
        index = []

        for bench_name in suite.get_benchmark_names():
            bench = suite.get_benchmark(bench_name)
            data_size = int(bench_name.split(" ")[0])
            index.append(data_size)
            latency_seconds = bench.median()
            result_latency[hash_name].append(latency_seconds)

    result_latency = pad_with_nan(result_latency)

    ordered_hash_names = ordered_intersection(
        list(DIGEST_SIZES.keys()), list(result_latency.keys())
    )
    df_latency = pd.DataFrame(result_latency, index=index)
    df_latency = df_latency[ordered_hash_names]

    df_t = df_latency.copy()
    df_t = df_t[df_t.index <= 256]

    small_data_velocity = 0.000001 / df_t.mean()

    max_row = df_latency.iloc[-1]

    max_row = float(index[-1]) / max_row
    max_row = max_row / (2**30)

    input_bandwidth_df = pd.DataFrame(max_row)
    input_bandwidth_df.index.name = "Hash"
    input_bandwidth_df.columns = ["Bandwidth"]

    digest_size_series = pd.Series(DIGEST_SIZES)[ordered_hash_names]
    input_bandwidth_df["Width"] = digest_size_series * 8
    input_bandwidth_df.sort_values("Bandwidth", ascending=False, inplace=True)
    input_bandwidth_df = input_bandwidth_df[["Width", "Bandwidth"]]

    input_bandwidth_df["Small Data Velocity"] = small_data_velocity

    input_bandwidth_df["✕ Width"] = (
        input_bandwidth_df["Width"] * input_bandwidth_df["Small Data Velocity"]
    ).round(0)

    input_bandwidth_df["cf. Collet (2020)"] = pd.Series(XXHASH_REFERENCE)

    # Prettify the table
    input_bandwidth_df["Bandwidth"] = input_bandwidth_df["Bandwidth"].map(
        lambda x: f"{x:.2f} GiB/s"
    )
    input_bandwidth_df["Small Data Velocity"] = input_bandwidth_df[
        "Small Data Velocity"
    ].map(lambda x: f"{x:.2f}")
    input_bandwidth_df["cf. Collet (2020)"] = input_bandwidth_df[
        "cf. Collet (2020)"
    ].map(lambda x: f"{x:.1f} GiB/s" if pd.notna(x) else "N/A")

    print(input_bandwidth_df.to_markdown())


================================================
FILE: benchmark/plot_graph.py
================================================
"""Plot the graph of the benchmark results."""

import argparse
import hashlib
import os
from typing import TypeVar

import matplotlib.pyplot as plt
import pandas as pd
import pyperf
import xxhash

import mmh3

T = TypeVar("T")


def pad_with_nan(data: dict[T, list[float]]) -> dict[T, list[float]]:
    """Pad the data with NaN values to make the length of all lists equal.

    Args:
        data: The data to pad.

    Returns:
        The padded data.
    """

    max_len = max(len(v) for v in data.values())
    for k, v in data.items():
        data[k] = v + [float("nan")] * (max_len - len(v))

    return data


def ordered_intersection(list1: list[T], list2: list[T]) -> list[T]:
    """Return the intersection of two lists in the order of the first list.

    Args:
        list1: The first list.
        list2: The second list.

    Returns:
        The intersection of the two lists in the order of the first list.
    """

    return [item for item in list1 if item in list2]


DIGEST_SIZES = {
    "mmh3_base_hash": mmh3.mmh3_32().digest_size,
    "mmh3_32": mmh3.mmh3_32().digest_size,
    "mmh3_128": mmh3.mmh3_x64_128().digest_size,
    "xxh_32": xxhash.xxh32().digest_size,
    "xxh_64": xxhash.xxh64().digest_size,
    "xxh3_64": xxhash.xxh3_64().digest_size,
    "xxh3_128": xxhash.xxh3_128().digest_size,
    "md5": hashlib.md5().digest_size,
    "sha1": hashlib.sha1().digest_size,
    "pymmh3_32": mmh3.mmh3_32().digest_size,
    "pymmh3_128": mmh3.mmh3_x64_128().digest_size,
}

THROUGHPUT_FILE_NAME = "throughput.png"
THROUGHPUT_SMALL_FILE_NAME = "throughput_small.png"
LATENCY_FILE_NAME = "latency.png"
LATENCY_SMALL_FILE_NAME = "latency_small.png"

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output-dir", required=True)
    parser.add_argument("filenames", nargs="+")
    args = parser.parse_args()

    result_latency: dict[str, list[float]] = {}
    result_throughput: dict[str, list[float]] = {}
    index: list[int] = []

    for file_name in args.filenames:
        suite = pyperf.BenchmarkSuite.load(file_name)
        base_name = os.path.basename(file_name)
        hash_name = os.path.splitext(base_name)[0]

        result_throughput[hash_name] = []
        result_latency[hash_name] = []
        index = []

        for bench_name in suite.get_benchmark_names():
            bench = suite.get_benchmark(bench_name)
            data_size = int(bench_name.split(" ")[0])
            index.append(data_size)
            latency_seconds = bench.median()

            result_throughput[hash_name].append(
                DIGEST_SIZES[hash_name] / latency_seconds
            )
            result_latency[hash_name].append(latency_seconds)

    result_throughput = pad_with_nan(result_throughput)
    result_latency = pad_with_nan(result_latency)

    ordered_hash_names = ordered_intersection(
        list(DIGEST_SIZES.keys()), list(result_throughput.keys())
    )
    df_throughput = pd.DataFrame(result_throughput, index=index)
    df_throughput = df_throughput[ordered_hash_names]
    df_latency = pd.DataFrame(result_latency, index=index)
    df_latency = df_latency[ordered_hash_names]

    plt.rcParams["figure.dpi"] = 72 * 3

    plt.figure()

    df_throughput_all = df_throughput / 1024
    df_throughput_all.index = df_throughput_all.index / 1024
    df_throughput_all.plot(
        xlabel="Input size (KiB)", ylabel="Throughput (KiB/s)", logy=True
    )
    plt.savefig(os.path.join(args.output_dir, THROUGHPUT_FILE_NAME))

    df_throughput_small = df_throughput / 1024 / 1024
    df_throughput_small = df_throughput_small.drop(columns=["md5", "sha1"])
    df_throughput_small = df_throughput_small[df_throughput_small.index <= 2048]
    df_throughput_small.plot(xlabel="Input size (bytes)", ylabel="Throughput (MiB/s)")
    plt.savefig(os.path.join(args.output_dir, THROUGHPUT_SMALL_FILE_NAME))

    df_latency_all = df_latency * 1000
    df_latency_all.index = df_latency_all.index / 1024
    df_latency_all.plot(xlabel="Input size (KiB)", ylabel="Latency (ms)")
    plt.savefig(os.path.join(args.output_dir, LATENCY_FILE_NAME))

    df_latency_small = df_latency * 1000 * 1000 * 1000
    df_latency_small = df_latency_small.drop(columns=["md5", "sha1"])
    df_latency_small = df_latency_small[df_latency_small.index <= 2048]
    df_latency_small.plot(xlabel="Input size (bytes)", ylabel="Latency (ns)")
    plt.savefig(os.path.join(args.output_dir, LATENCY_SMALL_FILE_NAME))

    df_throughput = pd.DataFrame(
        result_throughput, index=df_latency.index / (1024 * 1024)
    )

    plt.close("all")


================================================
FILE: benchmark/plot_graph_base_hash.py
================================================
# pylint: disable=R0801
"""An ad-hoc script to plot the graph of the benchmark results for mmh3.hash.

This file should be incorporated into the main plot module in the future.
"""

import argparse
import os
from typing import TypeVar

import matplotlib.pyplot as plt
import pandas as pd
import pyperf

import mmh3

T = TypeVar("T")


def pad_with_nan(data: dict[T, list[float]]) -> dict[T, list[float]]:
    """Pad the data with NaN values to make the length of all lists equal.

    Args:
        data: The data to pad.

    Returns:
        The padded data.
    """

    max_len = max(len(v) for v in data.values())
    for k, v in data.items():
        data[k] = v + [float("nan")] * (max_len - len(v))

    return data


def ordered_intersection(list1: list[T], list2: list[T]) -> list[T]:
    """Return the intersection of two lists in the order of the first list.

    Args:
        list1: The first list.
        list2: The second list.

    Returns:
        The intersection of the two lists in the order of the first list.
    """

    return [item for item in list1 if item in list2]


DIGEST_SIZES = {
    "mmh3_base_hash_500": mmh3.mmh3_32().digest_size,
    "mmh3_base_hash_410": mmh3.mmh3_32().digest_size,
    "mmh3_32_500": mmh3.mmh3_32().digest_size,
}

LATENCY_FILE_NAME = "latency_hash.png"

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--output-dir", required=True)
    parser.add_argument("filenames", nargs="+")
    args = parser.parse_args()

    result_latency: dict[str, list[float]] = {}
    index: list[int] = []

    for file_name in args.filenames:
        suite = pyperf.BenchmarkSuite.load(file_name)
        base_name = os.path.basename(file_name)
        hash_name = os.path.splitext(base_name)[0]

        result_latency[hash_name] = []
        index = []

        for bench_name in suite.get_benchmark_names():
            bench = suite.get_benchmark(bench_name)
            data_size = int(bench_name.split(" ")[0])
            index.append(data_size)
            latency_seconds = bench.median()

            result_latency[hash_name].append(latency_seconds)

    result_latency = pad_with_nan(result_latency)

    ordered_hash_names = ordered_intersection(
        list(DIGEST_SIZES.keys()), list(result_latency.keys())
    )

    df_latency = pd.DataFrame(result_latency, index=index)
    df_latency = df_latency[ordered_hash_names]

    plt.rcParams["figure.dpi"] = 72 * 3

    plt.figure()

    df_latency_small = df_latency * 1000 * 1000 * 1000
    df_latency_small = df_latency_small.drop(columns=["mmh3_32_500"])
    df_latency_small = df_latency_small.rename(
        columns={
            "mmh3_base_hash_410": "hash() in mmh3 4.1.0",
            "mmh3_base_hash_500": "hash() in mmh3 5.0.0",
        }
    )
    df_latency_small = df_latency_small[df_latency_small.index <= 2**12]
    df_latency_small.plot(xlabel="Input size (bytes)", ylabel="Latency (ns)")
    plt.savefig(os.path.join(args.output_dir, LATENCY_FILE_NAME))

    plt.close("all")


================================================
FILE: docs/CODE_OF_CONDUCT.md
================================================
# Code of Conduct

Contributors to this project are expected to follow the ACM Code of Ethics and
Professional Conduct, available at
[https://www.acm.org/code-of-ethics](https://www.acm.org/code-of-ethics). The
current version of the Code and its guidelines was adopted by the ACM Council
on June 22, 2018.


================================================
FILE: docs/CONTRIBUTING.md
================================================
# Contributing

Thank you for your interest in contributing to the `mmh3` project. We
appreciate your support and look forward to your contributions.

Please read [README](https://github.com/hajimes/mmh3/blob/master/README.md) to
get an overview of the `mmh3` project, and follow our
[Code of Conduct](./CODE_OF_CONDUCT) (ACM Code of Ethics and Professional
Conduct).

## Submitting issues

We welcome your contributions, whether it's submitting a bug report or
suggesting a new feature through the
[issue tracker](https://github.com/hajimes/mmh3/issues).

Before creating a new issue, please check the
[Frequently Asked Questions section in README](https://github.com/hajimes/mmh3#frequently-asked-questions)
to see if the problem has already been noted.

## Project structure

As of version 5.1.0, the project layout is structured as follows:

- `src/mmh3`
  - `mmh3module.c`: the main file that serves as the interface between Python
    and the MurmurHash3 c implementations.
  - `murmurhash.c`: implementations of the MurmurHash3 family. Auto-generated
    from Austin Appleby's original code. DO NOT edit this file manually. See
    [README in the util directory](https://github.com/hajimes/mmh3/blob/master/util/README.md)
    for details.
  - `murmurhash.h`: headers and macros for MurmurHash3. Auto-generated from
    `util/refresh.py`. DO NOT edit this file manually.
  - `hashlib.h`: taken from
    [CPython's code base](https://github.com/python/cpython/blob/9ce0f48e918860ffa32751a85b0fe7967723e2e3/Modules/hashlib.h).
- `util`
  - `refresh.py`: file that generates `src/mmh3/murmurhash.c` and
    `src/mmh3/murmurhash.h` from the original MurmurHash3 C++ code. Edit this
    file to modify the contents of these files.
- `benchmark`
  - `benchmark.py`: script to run benchmarks.
  - `plot_graph.py`: script to plot benchmark results.
- `docs`: project documentation directory
- `paper`: directory containing the academic paper for this project
- `.github/workflows`: GitHub Actions workflows

## Project setup

Run:

```shell
git clone https://github.com/hajimes/mmh3.git
```

This project uses `tox-uv` to automate testing and other tasks. You can install
`tox-uv` by running:

```shell
pipx install uv
uv tool install tox --with tox-uv
```

In addition, `npx` (included with `npm` >= 5.2.0) is required within the `tox`
environments to run linters.

## Testing and linting

Before submitting your changes, make sure to run the project's tests to ensure
everything is working as expected.

To run all tests, use the following command:

```shell
tox
```

During development, you can run the tests for a specific environment by
specifying the environment name. For example, to run tests for a specific
version of Python (e.g., Python 3.13), use:

```shell
tox -e py313
```

For type checking, run:

```shell
tox -e type
```

To run linters with automated formatting, use:

```shell
tox -e lint
```

### (Optional) Testing on s390x

When you have modified the code in a way which may cause endian issues, you may
want to locally test on s390x, the only big-endian platform officially supported
by Python.

[_Emulating a big-endian s390x with QEMU_](https://til.simonwillison.net/docker/emulate-s390x-with-qemu)
by Simon Willison is a good introduction to Docker/QEMU settings for emulating
s390x.

If the above does not work, you may also want to try the following:

```shell
docker run --rm --privileged tonistiigi/binfmt --install all
docker buildx create --name mybuilder --use
docker run -it multiarch/ubuntu-core:s390x-focal /bin/bash
```

## Pull request

Once you've pushed your changes to your fork, you can
[create a pull request (PR)](https://github.com/hajimes/mmh3/pulls) on the main
project repository. Please provide a clear and detailed description of your
changes in the PR, and reference any related issues.

## util directory

### Algorithm implementations used by the `mmh3` module

The `util` directory contains C files that were generated from the
[SMHasher](https://github.com/aappleby/smhasher) C++ project by Austin Appleby.

The idea of the subproject directory loosely follows the
[`hashlib` implementation of CPython](https://github.com/python/cpython/tree/main/Modules/_hacl).

### Updating mmh3 core C code

Run `tox -e build_cfiles`. This will fetch Appleby's original SMHasher project
as a git submodule and then generate PEP 7-compliant C code from the original
project.

To perform further edits, add transformation code to the `refresh.py` script,
instead of editing `murmurhash3.*` files manually.
Then, run `tox -e build_cfiles` again to update the `murmurhash3.*` files.

### Local files

1. `./util/README.md`
1. `./util/refresh.py`
1. `./util/FILE_HEADER`

### Generated files

1. `./src/mmh3/murmurhash3.c`
1. `./src/mmh3/murmurhash3.h`

## Benchmarking

To run benchmarks locally, try the following command:

```shell
tox -e benchmark -- -o OUTPUT_FILE \
            --test-hash HASH_NAME --test-buffer-size-max HASH_SIZE
```

where `OUTPUT_FILE` is the output file name (json formatted), `HASH_NAME` is
the name of the hash, and `HASH_SIZE` is the maximum buffer size to be tested
in bytes.

For example,

```shell
mkdir -p _results
tox -e benchmark -- -o _results/mmh3_128.json \
            --test-hash mmh3_128 --test-buffer-size-max 262144
```

As of version 5.1.0, the following hash function identifiers are available for
benchmarking: `mmh3_32`, `mmh3_128`, `xxh_32`, `xxh_64`, `xxh3_64`, `xxh3_128`,
`pymmh3_32`, `pymmh3_128`, `md5`, and `sha1`.

The owner of the repository can run the benchmark on GitHub Actions by using
the workflow defined in `.github/workflows/benchmark.yml`.

After obtaining the benchmark results, you can plot graphs by `plot_graph.py`.
The following is an example of how to run the script:

```shell
tox -e plot -- --output-dir docs/_static RESULT_DIR/*.json
```

where `RESULT_DIR` is the directory containing the benchmark results.
The names of json files should be in the format of `HASH_IDENTIFER.json`, e.g.,
`mmh3_128.json`.

## Documentation

Project documentation files are mainly written in the Markdown format and are
located in the `docs`. The documentation is automatically built and
[hosted on the Read the Docs](https://mmh3.readthedocs.io/en/latest/).

To build the documentation locally, use the following command:

```shell
tox -e docs
```

To check the result of the built documentation, open
`docs/_build/html/index.html` in your browser.


================================================
FILE: docs/CONTRIBUTORS.md
================================================
# Contributors

This page acknowledges contributors to the project. For details on the
project's history and changes, please refer to the [Changelog](./changelog.md)
page. If you're interested in contributing, be sure to review the
[Contributing](./CONTRIBUTING.md) guide.

## Code Contributors

We gratefully acknowledge the contributions of the following individuals:

- [Alexander Maznev](https://github.com/pik),
  [#6](https://github.com/hajimes/mmh3/pull/6).
- [@arieleizenberg](https://github.com/arieleizenberg),
  [#34](https://github.com/hajimes/mmh3/pull/34).
- [Micha Gorelick](https://github.com/mynameisfiber),
  [#1](https://github.com/hajimes/mmh3/pull/1).
- [Danil Shein](https://github.com/dshein-alt),
  [#40](https://github.com/hajimes/mmh3/pull/40).
- [Derek Wilson](https://github.com/underrun),
  [#2](https://github.com/hajimes/mmh3/pull/2),
  [#3](https://github.com/hajimes/mmh3/pull/3).
- [Dimitri Vorona](https://github.com/alendit),
  [#13](https://github.com/hajimes/mmh3/pull/13).
- [@doozr](https://github.com/doozr),
  [#15](https://github.com/hajimes/mmh3/pull/15).
- [Dušan Nikolić](https://github.com/n-dusan),
  [#37](https://github.com/hajimes/mmh3/pull/37).
- [Matthew Honnibal](https://github.com/honnibal),
  [#22](https://github.com/hajimes/mmh3/pull/22).
- [wouter bolsterlee](https://github.com/wbolster),
  [#35](https://github.com/hajimes/mmh3/pull/35).

## Community Contributors

We would also like to thank the following contributors for their valuable
bug reports, feature suggestions, and other contributions:

- [Antoine Pitrou](https://github.com/pitrou),
  [#10](https://github.com/hajimes/mmh3/issues/10).
- [Benjamin Bengfort](https://github.com/bbengfort),
  [#46](https://github.com/hajimes/mmh3/issues/46).
- [Christian von Schultz](https://github.com/vonschultz),
  [#50](https://github.com/hajimes/mmh3/issues/50).
- [Dan Blanchard](https://github.com/dan-blanchard),
  [#8](https://github.com/hajimes/mmh3/issues/8).
- [Heather Lapointe](https://github.com/Alphadelta14),
  [#25](https://github.com/hajimes/mmh3/issues/25).
- [Jacques Dark](https://github.com/jqdark),
  [#12](https://github.com/hajimes/mmh3/issues/12).
- [Matej Spiller Muys](https://github.com/matejsp),
  [#90](https://github.com/hajimes/mmh3/issues/90).
- [Niklas Semmler](https://github.com/niklassemmler),
  [#7](https://github.com/hajimes/mmh3/issues/7).
- [Ryan](https://github.com/ryanfwy),
  [#25](https://github.com/hajimes/mmh3/issues/25).
- [Sebastian Kreft](https://github.com/sk-),
  [#17](https://github.com/hajimes/mmh3/issues/17).
- [Tom Mitchell](https://github.com/tcmitchell),
  [#51](https://github.com/hajimes/mmh3/issues/51).
- [Varunkumar Nagarajan](https://github.com/varunkumar),
  [#39](https://github.com/hajimes/mmh3/issues/39).
- [@xqdd](https://github.com/xqdd),
  [#9](https://github.com/hajimes/mmh3/issues/9).
- [@yzssbo](https://github.com/yzssbo),
  [#25](https://github.com/hajimes/mmh3/issues/25).

## Paper Editors and Reviewers

We extend our heartfelt thanks to the following editors and reviewers of the
[_Journal of Open Source Software_](https://joss.theoj.org) (JOSS), whose
feedback greatly enhanced this project:

- [Daniel S. Katz](https://github.com/danielskatz) (Managing Editor-in-Chief)
- [Vince Knight](https://github.com/drvinceknight) (Editor)
- [Marek Šuppa](https://github.com/mrshu) (Reviewer)
- [Jules Pénuchot](https://github.com/JPenuchot) (Reviewer)
- [Gaëtan Cassiers](https://github.com/cassiersg) (Reviewer)


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/api.md
================================================
<!-- markdownlint-disable MD051 -->

# API Reference

The MurmurHash3 algorithm has three variants:

- MurmurHash3_x86_32: Generates 32-bit hashes using 32-bit arithmetic.
- MurmurHash3_x64_128: Generates 128-bit hashes using 64-bit arithmetic.
- MurmurHash3_x86_128: Generates 128-bit hashes using 32-bit arithmetic.

The `mmh3` library provides functions and classes for each variant.

Although this API reference is comprehensive, you may find the following
functions particularly useful:

- [mmh3.hash()](#mmh3.hash): Uses the 32-bit variant as its backend and accepts
  `bytes` or `str` as input (strings are UTF-8 encoded). This
  function is slower than the x64_128 variant in 64-bit environments but is
  portable across different architectures. It can also be used to calculate
  favicon hash footprints for platforms like
  [Shodan](https://www.shodan.io) and [ZoomEye](https://www.zoomeye.hk).
- [mmh3.mmh3_x64_128_digest()](#mmh3.mmh3_x64_128_digest): Uses the x64_128
  variant as its backend. This function accepts a buffer (e.g., `bytes`,
  `bytearray`, `memoryview`, and `numpy` arrays) and returns a 128-bit hash as
  a `bytes` object, similar to the `hashlib` module in the Python Standard
  Library. It performs faster than the 32-bit variant on 64-bit machines.

Note that **`mmh3` is endian-neutral**, while the original C++ library is
endian-sensitive (see also
[Frequently Asked Questions](https://github.com/hajimes/mmh3#frequently-asked-questions)).
This feature of `mmh3` is essential when portability across different
architectures is required, such as when calculating hash footprints for web
services.

```{caution}
[Buffer-accepting hash functions](#buffer-accepting-hash-functions) (except the
deprecated `hash_from_buffer`) accept positional-arguments only. Using keyword
arguments will raise a `TypeError`.
```

```{note}
Support for no-GIL mode (officially introduced in Python 3.14) was added in
version 5.2.0.
- Basic hash functions are inherently thread-safe by design.
- Buffer-accepting hash functions are thread-safe,
  **provided the input buffer is thread-safe**.
- Hasher classes are thread-safe,
  again **assuming the input buffer is thread-safe**.

However, thread safety under the no-GIL variant has not yet been
fully tested as of 5.2.0. If you encounter any issues, please report them via
the [issue tracker](https://github.com/hajimes/mmh3/issues).
```

## Basic Hash Functions

The following functions are used to hash immutable types, specifically
`bytes` and `str`. String inputs are automatically converted to `bytes` using
UTF-8 encoding before hashing.

Although `hash128()`, `hash64()`, and `mmh3.hash_bytes()` are provided for
compatibility with previous versions and are not marked for deprecation,
the [buffer-accepting hash functions](#buffer-accepting-hash-functions)
introduced in version 5.1.0 are recommended for new code.

```{eval-rst}
.. autofunction:: mmh3.hash
.. autofunction:: mmh3.hash128
.. autofunction:: mmh3.hash64
.. autofunction:: mmh3.hash_bytes
```

## Buffer-Accepting Hash Functions

The following functions are used to hash types that implement the buffer
protocol such as `bytes`, `bytearray`, `memoryview`, and `numpy` arrays.

```{seealso}
The buffer protocol,
[originally implemented as a part of Python/C API](https://docs.python.org/3/c-api/buffer.html),
was formally defined as a Python-level API in
[PEP 688](https://peps.python.org/pep-0688/)
in 2022 and its corresponding type hint was introduced as
[collections.abc.Buffer](https://docs.python.org/3/library/collections.abc.html#collections.abc.Buffer)
in Python 3.12. For earlier Python versions, `mmh3` uses a type alias for the
type hint
[\_typeshed.ReadableBuffer](https://github.com/python/typeshed/blob/d326c9bd424ad60c2b63c2ca1c5c1006c61c3562/stdlib/_typeshed/__init__.pyi#L281),
which is itself an alias for
[typing_extensions.Buffer](https://typing-extensions.readthedocs.io/en/stable/#typing_extensions.Buffer),
the backported type hint for `collections.abc.Buffer`.
```

```{eval-rst}
.. autofunction:: mmh3.hash_from_buffer
.. autofunction:: mmh3.mmh3_32_digest
.. autofunction:: mmh3.mmh3_32_sintdigest
.. autofunction:: mmh3.mmh3_32_uintdigest
.. autofunction:: mmh3.mmh3_x64_128_digest
.. autofunction:: mmh3.mmh3_x64_128_sintdigest
.. autofunction:: mmh3.mmh3_x64_128_stupledigest
.. autofunction:: mmh3.mmh3_x64_128_uintdigest
.. autofunction:: mmh3.mmh3_x64_128_utupledigest
.. autofunction:: mmh3.mmh3_x86_128_digest
.. autofunction:: mmh3.mmh3_x86_128_sintdigest
.. autofunction:: mmh3.mmh3_x86_128_stupledigest
.. autofunction:: mmh3.mmh3_x86_128_uintdigest
.. autofunction:: mmh3.mmh3_x86_128_utupledigest
```

## Hasher Classes

`mmh3` implements hashers with interfaces similar to those in `hashlib` from
the standard library: `mmh3_32()` for 32-bit hashing, `mmh3_x64_128()` for
128-bit hashing optimized for x64 architectures, and `mmh3_x86_128()` for
128-bit hashing optimized for x86 architectures.

In addition to the standard `digest()` method, each hasher provides
`sintdigest()`, which returns a signed integer, and `uintdigest()`, which
returns an unsigned integer. The 128-bit hashers also include `stupledigest()`
and `utupledigest()`, which return two 64 bit integers.

Please note that as of version 5.0.0, the implementation is still experimental,
and performance may be unsatisfactory (particularly `mmh3_x86_128()`).
Additionally, `hexdigest()` is not supported; use `digest().hex()` instead.

```pycon
>>> import mmh3
>>> hasher = mmh3.mmh3_x64_128(b"foo", 42) # seed=42
>>> hasher.update(b"bar")
>>> hasher.digest()
b'\x82_n\xdd \xac\xb6j\xef\x99\xb1e\xc4\n\xc9\xfd'
>>> hasher.sintdigest() # 128 bit signed int
-2943813934500665152301506963178627198
>>> hasher.uintdigest() # 128 bit unsigned int
337338552986437798311073100468589584258
>>> hasher.stupledigest() # two 64 bit signed ints
(7689522670935629698, -159584473158936081)
>>> hasher.utupledigest() # two 64 bit unsigned ints
(7689522670935629698, 18287159600550615535)
```

```{eval-rst}
.. autoclass:: mmh3.mmh3_32
   :members:
```

```{eval-rst}
.. autoclass:: mmh3.mmh3_x64_128
   :members:
```

```{eval-rst}
.. autoclass:: mmh3.mmh3_x86_128
   :members:
```


================================================
FILE: docs/benchmark.md
================================================
# Benchmarks

## Settings

### Machine

- Ubuntu 22.04 instance on GitHub Actions
  - The benchmarking suits are implemented as GitHub Actions workflows.
  - [4 processors, 16 GB RAM, 14 GB storage (SSD)](https://docs.github.com/en/actions/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories)
  - According to profiling with `pyperf`, each processor operates at a
    frequency between 2.4 and 3.3 GHz.
- Tuning by the folloiwing settings:
  - All tests in a benchmarking suite are executed within the same GitHub
    Actions job. For more details, refer to
    [Rodríguez-Guerra (2021)](https://labs.quansight.org/blog/2021/08/github-actions-benchmarks).
  - [CPU pinning](https://manuel.bernhardt.io/posts/2023-11-16-core-pinning/)
    to isolate the benchmarking process.
  - See the
    [documentation of pyperf](https://pyperf.readthedocs.io/en/latest/system.html)
    for more details on the following settings:
    - Stop `irqbalance`.
    - Set `/proc/irq/default_smp_affinity` to `3` (CPU 0 and 1), where
      the benchmarking processes are pinned to CPU 2 and 3.
    - Set `/proc/sys/kernel/perf_event_max_sample_rate` to `1`.
    - `/proc/sys/kernel/randomize_va_space` = 2 (default)

### Software

- Python environment:
  - CPython 3.12.5 (64-bit)
- Hash libraries:
  - mmh3 5.0.0-dev
  - [python-xxhash](https://github.com/ifduyue/python-xxhash) 3.5.0
  - [hashlib](https://docs.python.org/3/library/hashlib.html) (Standard library)
    - `md5` is tested for `lambda x: hashlib.md5(x).digest()`, and so is `sha1`.
      therefore, the results for these functions include
      the overhead of creating the hash object and a function call.
- Benchmarking library:
  - [pyperf](https://github.com/psf/pyperf) 2.7.0
    - Used the
      [bench_time_func](https://pyperf.readthedocs.io/en/latest/api.html#Runner.bench_time_func)
      interface to eliminate the overhead of the function call.
    - Processed time are measured by
      [time.perf_counter()](https://docs.python.org/3/library/time.html#time.perf_counter)
      in nanoseconds.

## Method

- A benchmarking test is performed for each specified byte size, which is
  derived from the Fibonacci sequence.
- For each input size, the test generates a set of 10 `bytes` instances, where
  each instance's size is pseudo-randomly selected from the range
  `[ceil(input * 0.9), floor(input * 1.1)]`.
  - This randomization is crucial as it increases the difficulty of branch
    predictions, creating a more realistic scenario. For further details, see
    [xxHash: Performance comparison](https://github.com/Cyan4973/xxHash/wiki/Performance-comparison#throughput-on-small-data-of-random-length-1-n).
- This inner loop of 10 iterations is repeated for a certain number of cycles,
  referred to as the outer loop, which is auto-calibrated by `pyperf`.
- To avoid the overhead during the loop, iterators are pre-generated
  using `itertools.repeat()` outside the loop. See
  [Peters (2002)](https://www.oreilly.com/library/view/python-cookbook/0596001673/ch17.html)
  and the real code of the `timeit` module in the Python Standard Library.
- The final result are measured using the median, as it is more robust than the
  mean, especially on untuned or unstable environments such as GitHub
  Actions. For more details, see
  [pyperf: Analyze benchmark results](https://pyperf.readthedocs.io/en/latest/analyze.html).

## Results

The resulting graphs are plotted using the `pandas` and `matplotlib` libraries.

### Comparison of Version Improvements

JSON files containing the benchmark results are available at:
[hajimes/mmh3-benchmarks/results_basic-hash/2024-09-17_6bb9987](https://github.com/hajimes/mmh3-benchmarks/tree/main/results_basic-hash/2024-09-17_6bb9987)

```{figure} _static/latency_hash.png
:alt: Latency for hash() in version 4.1.0 and 5.0.0.
:align: center

Figure 1: Latency for `mmh3.hash()` in version 4.1.0 and 5.0.0.
Smaller is better.
```

### Comparison of Hash Functions Across Libraries

JSON files containing the benchmark results are available at:
[hajimes/mmh3-benchmarks/results/2024-09-17_30da46e](https://github.com/hajimes/mmh3-benchmarks/tree/main/results/2024-09-17_30da46e)

In the following graphs:

- `mmh32_32` refers to `mmh3.mmh3_32_digest()`.
  32-bit output using 32-bit arithmetic. Developed in 2011.
- `mmh3_128` refers to `mmh3.mmh3_x64_128_digest()`.
  128-bit output using 64-bit arithmetic. Developed in 2011.
- `xxh_32` refers to `xxhash.xxh32_digest()`.
  32-bit output using 32-bit arithmetic. Developed in 2014.
- `xxh_64` refers to `xxhash.xxh64_digest()`.
  64-bit output using 64-bit arithmetic. Developed in 2014.
- `xxh3_64` refers to `xxhash.xxh3_64_digest()`.
  64-bit output using vectorized arithmetic. Developed in 2020.
- `xxh3_128` refers to `xxhash.xxh3_64_digest()`.
  128-bit output using vectorized arithmetic. Developed in 2020.
- `md5` refers to `hashlib.md5()`.
  128-bit output using a cryptogprahic algorithm. Developed in 1992.
- `sha1` refers to `hashlib.sha1()`.
  160-bit output using a cryptogprahic algorithm. Developed in 1995.

```{figure} _static/latency_small.png
:alt: Latency for small data
:align: center

Figure 2: Latency for small data. Smaller is better.
```

```{figure} _static/latency.png
:alt: Latency for large data
:align: center

Figure 3: Latency for large data. Smaller is better.
```

The following graphs show the throughput, measured as the size of hash output
generated per second by each function.

```{figure} _static/throughput_small.png
:alt: Throughput for small data
:align: center

Figure 4: Throughput for small data. Larger is better.
```

```{figure} _static/throughput.png
:alt: Throughput for large data
:align: center

Figure 5: Throughput for large data. Larger is better. The y-axis is logscale.
```

## Concluding Remarks

Version 5.0.0 of the `mmh3` library has improved the performance of the
`hash()` function and other new functions by adopting
[METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL).
This enhancement reduces the overhead of function calls. For data sizes
between 1–2 KB (such as 48x48 favicons), performance has improved by 10%–20%.
For smaller data (~500 bytes, like 16x16 favicons), performance increases by
approximately 30%. However, the performance gain from this revision remains
constant, meaning the relative improvement diminishes as data size increases.

When comparing hash functions across libraries, `mmh3 5.0.0` is the most
performant for small data sizes, while the `xxh3` families in `xxhash 3.5.0`
excel with larger data. This is largely due to the new version of `mmh3`
utilizing `METH_FASTCALL`, which reduces the overhead of function calls.
However, `xxhash` may adopt the same interface in the future, potentially
making this advantage temporary. To further improve `mmh3` performance,
the core algorithm itself would need an overhaul.

Overall, these benchmarking results serve as a useful reference when selecting
a hash function for your application, and they provide a solid foundation for
future performance enhancements to our library.

## References

- Python Standard Library.
  [timeit](https://docs.python.org/3/library/timeit.html).
- [pyperf: Analyze benchmark results](https://pyperf.readthedocs.io/en/latest/analyze.html).
- [pyperf: Tune the system for benchmarks](https://pyperf.readthedocs.io/en/latest/system.html).
- [pyperf issues #1: Use a better measures than average and standard deviation #1](https://github.com/psf/pyperf/issues/1).
- [pyperf issues #75: Reconsidering min()?](https://github.com/psf/pyperf/issues/75).
- [pytest-benchmark: Usage](https://pytest-benchmark.readthedocs.io/en/latest/usage.html).
- [xxHash: Performance comparison](https://github.com/Cyan4973/xxHash/wiki/Performance-comparison).
- [xxHash benchmark program](https://github.com/Cyan4973/xxHash/tree/release/tests/bench).
- Manuel Bernhardt. 2023.
  [On pinning and isolating CPU cores](https://manuel.bernhardt.io/posts/2023-11-16-core-pinning/).
- Micha Gorelick and Ian Ozsvald. 2020.
  [High Performance Python: Practical Performant Programming for Humans, 2nd ed](https://www.oreilly.com/library/view/high-performance-python/9781492055013/).
  O'Reilly Media. ISBN: 978-1492055020. Chapter 2.
- Tim Peters. 2002.
  [Chapter 17. Algorithms: Introduction](https://www.oreilly.com/library/view/python-cookbook/0596001673/ch17.html)
  in _Python Cookbook_,
  3rd ed. O'Reilly Media. ISBN: 978-0596001674.
- Jaime Rodríguez-Guerra. 2021.
  [Is GitHub Actions suitable for running benchmarks?](https://labs.quansight.org/blog/2021/08/github-actions-benchmarks).
- Victor Stinner. 2016.
  [My journey to stable benchmark, part 1 (system)](https://vstinner.github.io/journey-to-stable-benchmark-system.html).
- Victor Stinner. 2016. [My journey to stable benchmark, part 3 (average)](https://vstinner.github.io/journey-to-stable-benchmark-average.html).


================================================
FILE: docs/changelog.md
================================================
<!-- markdownlint-disable -->

```{include} ../CHANGELOG.md

```


================================================
FILE: docs/conf.py
================================================
# pylint: disable=C0114,C0103
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# import os
# import sys

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = "mmh3"
project_copyright = "2011-2025, Hajime Senuma"
author = "Hajime Senuma"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.napoleon",
    "sphinx_copybutton",
    "myst_parser",
]

templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "shibuya"
html_static_path = ["_static"]
html_theme_options = {
    "github_url": "https://github.com/hajimes/mmh3",
}

# -- Options for autodoc -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html

autodoc_member_order = "groupwise"

myst_heading_anchors = 3


================================================
FILE: docs/index.rst
================================================
mmh3 documentation
==================
mmh3 is a Python extension for
`MurmurHash (MurmurHash3) <https://en.wikipedia.org/wiki/MurmurHash>`_,
a set of fast and robust non-cryptographic hash functions invented by Austin
Appleby.

.. toctree::
   :maxdepth: 2
   :caption: User Guideline

   Quickstart<quickstart>
   api
   benchmark
   Changelog<changelog>
   CONTRIBUTORS

.. toctree::
   :maxdepth: 2
   :caption: Project documentation

   CONTRIBUTING
   CODE_OF_CONDUCT

Indices and tables
==================

* :ref:`genindex`
* :ref:`search`

================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/quickstart.md
================================================
<!-- markdownlint-disable -->

```{include} ../README.md

```


================================================
FILE: paper/paper.bib
================================================
@article{adja_blockchain-based_2021,
	title        = {A blockchain-based certificate revocation management and status verification system},
	author       = {Adja, Yves Christian Elloh and Hammi, Badis and Serhrouchni, Ahmed and Zeadally, Sherali},
	year         = 2021,
	journal      = {Computers \& Security},
	volume       = 104,
	pages        = 102209,
	doi          = {10.1016/j.cose.2021.102209},
	issn         = {0167-4048},
	url          = {https://www.sciencedirect.com/science/article/pii/S016740482100033X},
	keywords     = {Authentication, Blockchain, Bloom filter, Certificate, Decentralization, PKI, Revocation, Security, X509}
}
@misc{appleby_murmurhash3_2011,
	title        = {{MurmurHash3} and {SMHasher}},
	author       = {Appleby, Austin},
	year         = 2011,
	url          = {https://github.com/aappleby/smhasher}
}
@misc{Bernhardt2023,
	title        = {On pinning and isolating CPU cores},
	author       = {Bernhardt, Manuel},
	year         = 2023,
	url          = {https://manuel.bernhardt.io/posts/2023-11-16-core-pinning/}
}
@article{Bloom1970,
	title        = {Space/Time Trade-Offs in Hash Coding with Allowable Errors},
	author       = {Bloom, Burton H.},
	year         = 1970,
	month        = {jul},
	journal      = {Commun. ACM},
	publisher    = {Association for Computing Machinery},
	address      = {New York, NY, USA},
	volume       = 13,
	number       = 7,
	pages        = {422–426},
	doi          = {10.1145/362686.362692},
	issn         = {0001-0782},
	url          = {https://doi.org/10.1145/362686.362692},
	issue_date   = {July 1970},
	numpages     = 5,
	keywords     = {retrieval efficiency, storage efficiency, hash addressing, scatter storage, searching, storage layout, retrieval trade-offs, hash coding}
}
@inproceedings{Broder1997a,
	title        = {On the resemblance and containment of documents},
	author       = {Broder, Andrei Z.},
	year         = 1997,
	booktitle    = {Proceedings. Compression and Complexity of {SEQUENCES} 1997},
	publisher    = {IEEE Comput. Soc},
	pages        = {21--29},
	doi          = {10.1109/SEQUEN.1997.666900},
	isbn         = {0-8186-8132-2},
	url          = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=666900},
	note         = {ISSN: 0818681322},
	keywords     = {MinHash}
}
@misc{collet_xxhash_2014,
	title        = {{xxHash}},
	author       = {Collet, Yan},
	year         = 2014,
	url          = {https://github.com/Cyan4973/xxHash}
}
@misc{collet_xxhash_comparison_2020,
	title        = {{xxHash}: Performance comparison (2020)},
	author       = {Collet, Yan},
	year         = 2020,
	url          = {https://github.com/Cyan4973/xxHash/wiki/Performance-comparison}
}
@misc{du_xxhash_2014,
	title        = {{xxhash}},
	author       = {Du, Yue},
	year         = 2014,
	url          = {https://github.com/ifduyue/python-xxhash}
}
@misc{faraday_security_understanding_2022,
	title        = {Understanding {Spring4Shell}},
	author       = {{Faraday Security}},
	year         = 2022,
	month        = jul,
	url          = {https://faradaysec.com/understanding-spring4shell/}
}
@book{gorelick_high_2020,
	title        = {High Performance {P}ython: Practical Performant Programming for Humans},
	author       = {Gorelick, Micha and Ozsvald, Ian},
	year         = 2020,
	month        = jun,
	publisher    = {O'Reilly Media},
	isbn         = {978-1-4920-5502-0},
	edition      = {2nd edition}
}
@software{hugo_van_kemenade_2024_13624792,
  author       = {Van Kemenade, Hugo and
                  Si, Richard and
                  Dollenstein, Zsolt},
  title        = {hugovk/top-pypi-packages: Release 2024.09},
  month        = sep,
  year         = 2024,
  publisher    = {Zenodo},
  version      = {2024.09},
  doi          = {10.5281/zenodo.13624792},
  url          = {https://doi.org/10.5281/zenodo.13624792}
}
@inproceedings{kakwani_indicnlpsuite_2020,
	title        = {{IndicNLPSuite}: Monolingual Corpora, Evaluation Benchmarks and Pre-trained Multilingual Language Models for {I}ndian Languages},
	author       = {Kakwani, Divyanshu and Kunchukuttan, Anoop and Golla, Satish and N.C., Gokul and Bhattacharyya, Avik and Khapra, Mitesh M. and Kumar, Pratyush},
	year         = 2020,
	month        = nov,
	booktitle    = {Findings of the {A}ssociation for {C}omputational {L}inguistics: {EMNLP} 2020},
	publisher    = {Association for Computational Linguistics},
	address      = {Online},
	pages        = {4948--4961},
	doi          = {10.18653/v1/2020.findings-emnlp.445},
	url          = {https://aclanthology.org/2020.findings-emnlp.445}
}
@misc{kihlander_pymmh3_2013,
	title        = {{PYMMH3}},
	author       = {Kihlander, Fredrik and Gusani, Swapnil},
	year         = 2013,
	url          = {https://github.com/wc-duck/pymmh3}
}
@techreport{kopriva_hunting_2021,
	title        = {Hunting phishing websites with favicon hashes},
	author       = {Kopriva, Jan},
	year         = 2021,
	month        = apr,
	url          = {https://isc.sans.edu/diary/27326},
	institution  = {SANS Internet Storm Center}
}
@book{kumar_probabilistic_2021,
	title        = {Probabilistic Data Structures for Blockchain-Based {I}nternet of {T}hings Applications},
	author       = {Kumar, Neeraj and Miglani, Arzoo},
	year         = 2021,
	month        = jan,
	publisher    = {CRC Press},
	doi          = {10.1201/9781003080046},
	isbn         = {978-0-367-52990-1}
}
@book{medjedovic_algorithms_2022,
	title        = {Algorithms and Data Structures for Massive Datasets},
	author       = {Medjedovic, Dzejla and Tahirovic, Emin and Dedovic, Ines},
	year         = 2022,
	month        = jul,
	publisher    = {Manning},
	isbn         = {978-1-61729-803-5}
}
@techreport{Matherly2017,
	title        = {Complete Guide to Shodan: Collect. Analyze. Visualize. Make Internet Intelligence Work for You.},
	author       = {Matherly, John},
	year         = 2017,
	edition	     = {Version 2017-08-23},
	institution  = {Shodan}
}
@misc{Matherly2024,
	title        = {Deep Dive: http.favicon},
	author       = {Matherly, John},
	year         = 2024,
	month        = {jan},
	url          = {https://blog.shodan.io/deep-dive-http-favicon/},
	institution  = {Shodan}
}
@incollection{Peters2002,
  author        = {Peters, Tim},
  title         = {Algorithms: Introduction},
  chapter       = {17},
  year          = 2002,
  month		    = {jul},
  booktitle     = {Python Cookbook},
  publisher     = {O'Reilly Media},
  editor        = {Martelli, Alex and Ascher, David},
  edition       = {1st edition}
}
@techreport{RodriguezGuerra2021,
	title        = {Is GitHub Actions suitable for running benchmarks?},
	author       = {Rodríguez-Guerra, Jaime},
	year         = 2021,
	month        = {aug},
	url          = {https://labs.quansight.org/blog/2021/08/github-actions-benchmarks},
	institution  = {Quansight Labs}
}
@inproceedings{Senuma2011,
	title        = {K-means Clustering with Feature Hashing},
	author       = {Senuma, Hajime},
	year         = 2011,
	booktitle    = {Proceedings of the 49th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics: Student Session},
	pages        = {122--126},
	note         = {Issue: June}
}
@inproceedings{Senuma2016,
	title        = {Learning Succinct Models: Pipelined Compression with {L}1-Regularization, Hashing, {E}lias–{F}ano Indices, and Quantization},
	author       = {Senuma, Hajime and Aizawa, Akiko},
	year         = 2016,
	booktitle    = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
	pages        = {2774--2784}
}
@article{Shi2009,
	title        = {Hash Kernels for Structured Data},
	author       = {Shi, Qinfeng and Petterson, James and Dror, Gideon and Langford, John and Smola, Alex and Vishwanathan, S.V.N.},
	year         = 2009,
	journal      = {Journal of Machine Learning Research},
	volume       = 10,
	pages        = {2615--2637}
}
@misc{shodan_its_2021,
	title        = {it's the {MMH3} hash of the http.html property. See: {PyPI} mmh3},
	author       = {Shodan},
	year         = 2021,
	month        = may,
	journal      = {Twitter},
	url          = {https://twitter.com/shodanhq/status/1395501365456261122}
}
@misc{Stinner2016,
	title        = {My journey to stable benchmark},
	author       = {Stinner, Victor},
	year         = 2016,
	url          = {https://vstinner.github.io/journey-to-stable-benchmark-system.html}
}
@inproceedings{Tang2024,
	title        = {Data Splitting based Double Layer Encryption for
Secure Ciphertext Deduplication in Cloud Storage},
	author       = {Tang, Xin and Jin, Luchao},
	year         = 2024,
	journal      = {2024 IEEE 17th International Conference on Cloud Computing (CLOUD)},
	pages        = {153--163},
	doi          = {10.1109/CLOUD62652.2024.00027}
}
@inproceedings{Weinberger2009,
	title        = {Feature Hashing for Large Scale Multitask Learning},
	author       = {Weinberger, Kilian and Dasgupta, Anirban and Langford, John and Smola, Alex and Attenberg, Josh},
	year         = 2009,
	booktitle    = {Proceedings of the 26th International Conference on Machine Learning},
	doi          = {10.1145/1553374.1553516},
	note         = {arXiv: 0902.2206v5}
}


================================================
FILE: paper/paper.md
================================================
---
title: "mmh3: A Python extension for MurmurHash3"
tags:
  - Python
  - hash
  - high-performance computing
  - artificial intelligence
  - natural language processing
  - internet of things
  - cybersecurity
authors:
  - name: Hajime Senuma
    orcid: 0000-0001-8542-1768
    affiliation: 1
affiliations:
  - name: National Institute of Informatics, Japan
    index: 1
date: 15 Dec 2024
bibliography: paper.bib
---

<!-- markdownlint-disable single-h1 -->

# Summary

In recent years, artificial intelligence (AI) has rapidly evolved, particularly
in natural language processing (NLP) with services like OpenAI's ChatGPT.
Likewise, the Internet of Things (IoT) continues to grow as a key area of
ubiquitous computing, exemplified by Shodan, the first IoT search engine.

Underlying these advancements are high-performance algorithms and data
structures relying on non-cryptographic hash functions, which are
characteristically fast, produce statistically well-distributed bits, exhibit
an avalanche effect (where a one-bit change in the input alters at least half
of the output), and are collision resistant. Because cryptographic strength is
unnecessary in these cases, they benefit from the efficiency of
non-cryptographic hashes.

MurmurHash3 and its test suite, SMHasher, was developed
by @appleby_murmurhash3_2011 and is one of the earliest and most continuously
popular hash functions specifically designed to implement the characteristics
mentioned above.

`mmh3` was launched in 2011 as a Python extension for MurmurHash3 and has been
maintained ever since. Its API is simple to use for Python programmers,
as it offers both one-shot hash functions and hasher classes that allow
incremental updating, whose methods are compliant to `hashlib`, a part of the
Python Standard Library. The library provides Python wheels (i.e., pre-built
binary packages) for immediate use on various platforms, including Linux
(x86_64, aarch64, i686, ppc64le, and s390x), Windows (win32, win_amd64,
and win_arm64), and macOS (Intel Mac and Apple Silicon). From version 4.0.0,
`mmh3` has been published under the MIT License, an OSI-approved permissive
open-source license.

As of September 1, 2024, `mmh3` was being downloaded more than 4 million times
per month, and it ranks as the 973th most downloaded PyPI package
(of around 566,000 projects), showing that only 0.17% of the remaining packages
in the PyPI ecosystem are more popular [@hugo_van_kemenade_2024_13624792].
According to PePy, as of September 1, 2024, the total downloads of
this library exceeded 130 millions.

Libraries and organizations that use `mmh3` include
Shodan, Microsoft Azure SDK for Python,
Apache Iceberg (open table format for analytic datasets),
Feast (feature store for machine learning),
PyMilvus (Python SDK for Milvus, an open-source vector database),
and pocsuite3 (open-source remote vulnerability testing framework).

# Statement of need

## AI and High-Performance Computing

AI is one of the most resource-demanding fields in computer science
and engineering. To mitigate this problem, various techniques are employed
under main systems, in which non-cryptographic hash functions play key roles
in a number of algorithms and data structures.

A notable technique is _feature hashing_ [@Weinberger2009; @Shi2009]. In its
simplest usage, when given a string-indexed data vector, it converts the
vector into an integer-indexed data vector in which each index is the hash
result of the original string index; collision values are summed.
Despite its simple and intuitive usage, a machine-learning process with feature
hashing is statistically guaranteed to be nearly as accurate as its original
process. Feature hashing has been shown to be useful for various situations,
including K-means clustering [@Senuma2011]
and succinct model learning [@Senuma2016].

Other popular techniques that leverage non-cryptographic hash functions include
_Bloom Filter_ [@Bloom1970], a compact data structure that tests whether an
element is a member of a certain set (with false positive matches), and
_MinHash_ [@Broder1997a], an algorithm that quickly estimates the similarity of
two sets.

`mmh3` appears in scholarly papers on various topics,
including Indian language NLP suites [@kakwani_indicnlpsuite_2020],
a secure system based on probabilistic structures [@adja_blockchain-based_2021],
as well as secure ciphertext deduplication in cloud storage [@Tang2024].
It has also appeared in technical books and computer science texts
[@gorelick_high_2020; @kumar_probabilistic_2021; @medjedovic_algorithms_2022].

## Internet of Things

`mmh3` is applicable to the IoT field. According to @shodan_its_2021,
Shodan [@Matherly2017] uses `mmh3` as its fingerprint for a favicon (i.e., an
icon associated with a web page or website). @Matherly2024 explained
the adoption of `mmh3` due to its speed and compact hash size,
noting that cryptographic guarantees provided by `md5` and other hashes were
not necessary for their use case. ZoomEye, another popular IoT search engine,
follows Shodan’s convention.

For cybersecurity, @kopriva_hunting_2021 reported a method of discovering
possible phishing websites by searching websites with Shodan, whose favicon’s
`mmh3` hash value was the same as that of a genuine one. Another use case of
`mmh3` in this area includes open-source intelligence (OSINT) activities,
such as measuring the popularity of web frameworks
and servers, as some users do not change their default favicon settings
specified by applications [@faraday_security_understanding_2022].

# Related software

`PYMMH` [@kihlander_pymmh3_2013] is a pure Python implementation of the
MurmurHash3 algorithms. Among various other Python bindings for
non-cryptographic hashes, `python-xxhash` by Yue Du [@du_xxhash_2014] is another
popular hash library, featuring xxHash developed by
Yan Collet [@collet_xxhash_2014].

# Benchmarks

We conducted microbenchmarking experiments to compare the efficiency of
Python-C hash libraries, balancing accuracy, reproducibility, and
reliability. Our methodology follows practices from microbenchmarking
literature, including works by @Peters2002, @Stinner2016,
@collet_xxhash_comparison_2020, @gorelick_high_2020, @RodriguezGuerra2021,
and @Bernhardt2023.

\autoref{bandwidth} and \autoref{latency} summarize the benchmarking results.
While the `xxh3` family in `python-xxhash 3.5.0` shows superior
performance for large inputs, the `mmh3 5.0.0` implementation excels with
smaller inputs (common scenarios for non-cryptographic hashes), due to its use
of `METH_FASTCALL`, an overhead-reducing interface introduced in Python 3.7.

For details, see the documentation of the project:
<https://mmh3.readthedocs.io/en/stable/benchmark.html>.
Additionally, the benchmarking results are publicly available as JSON files in
the repository: <https://github.com/hajimes/mmh3-benchmarks>.

<!-- markdownlint-capture -->
<!-- markdownlint-disable line-length -->

: \label{bandwidth}Benchmarking results for Python extensions. Small data
velocity is defined as the inverse of the mean latency (in microseconds) for
inputs in the range of 1–256 bytes. Collet (2020) refers to the results
of original C implementations experimented by the author of xxHash, using a CPU
clocked at 3.6–4.9 GHz (ours: 2.4–3.3 GHz).

| Hash         |    Width | Bandwidth       | Small Data Velocity | cf. Collet (2020) |
| :----------- | -------: | :-------------- | ------------------: | :---------------- |
| xxh3_128     | 128 bits | **22.42 GiB/s** |                8.96 | 29.6 GiB/s        |
| xxh3_64      |  64 bits | 22.41 GiB/s     |                 9.5 | 31.5 GiB/s        |
| xxh_64       |  64 bits | 8.90 GiB/s      |                 9.3 | 9.1 GiB/s         |
| **mmh3_128** | 128 bits | 6.91 GiB/s      |           **19.04** | N/A               |
| xxh_32       |  32 bits | 6.15 GiB/s      |                8.91 | 9.7 GiB/s         |
| **mmh3_32**  |  32 bits | 2.86 GiB/s      |               18.41 | 3.9 GiB/s         |
| sha1         |  16 bits | 1.63 GiB/s      |                 2.4 | 0.8 GiB/s         |
| md5          | 128 bits | 0.65 GiB/s      |                1.95 | 0.6 GiB/s         |

<!-- markdownlint-restore -->

![\label{latency}Latency for small to medium-sized inputs. Lower is better.](../docs/_static/latency_small.png)

# Acknowledgements

The author extends sincere gratitude to Akiko Aizawa for her helpful comments
on this paper. Appreciation is also given to all those involved in the
development and maintenance of `mmh3`. Special thanks go to Micha Gorelick,
who made the first pull request to the project and later introduced the
library in her technical book [@gorelick_high_2020].

# References


================================================
FILE: pyproject.toml
================================================
[build-system]
# setuptools >= 74.1.0 required to build C extensions via pyproject.toml
requires = ["setuptools >= 74.1.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "mmh3"
version = "5.2.1"
description = "Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions."
readme = "README.md"
license = {file = "LICENSE"}
keywords = ["utility", "hash", "MurmurHash"]
requires-python = ">=3.10"
authors = [
  {name = "Hajime Senuma", email="hajime.senuma@gmail.com"}
]
classifiers = [
  "Development Status :: 5 - Production/Stable",
  "Intended Audience :: Developers",
  "License :: OSI Approved :: MIT License",
  "Programming Language :: Python :: 3",
  "Programming Language :: Python :: 3.10",
  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.12",
  "Programming Language :: Python :: 3.13",
  "Programming Language :: Python :: 3.14",
  "Programming Language :: Python :: Free Threading :: 2 - Beta",
  "Topic :: Software Development :: Libraries",
  "Topic :: Utilities"
]

[project.optional-dependencies]
test = [
  "pytest == 9.0.2",
  "pytest-sugar == 1.1.1"
]
lint = [
  "actionlint-py == 1.7.11.24",
  "clang-format == 22.1.0",
  "codespell == 2.4.1",
  "pylint == 4.0.5",
  "ruff == 0.15.4"
]
type = [
  "mypy == 1.19.1"
]
docs = [
  "myst-parser == 5.0.0",
  "shibuya == 2026.1.9",
  "sphinx == 8.2.3",
  "sphinx-copybutton == 0.5.2"
]
benchmark = [
  "pymmh3 == 0.0.5",
  "pyperf == 2.10.0",
  "xxhash == 3.6.0"
]
plot = [
  "matplotlib == 3.10.8",
  "pandas == 3.0.1"
]

[project.urls]
Homepage = "https://pypi.org/project/mmh3/"
Documentation = "https://mmh3.readthedocs.io/"
Repository = "https://github.com/hajimes/mmh3"
Changelog = "https://github.com/hajimes/mmh3/blob/master/CHANGELOG.md"
"Bug Tracker" = "https://github.com/hajimes/mmh3/issues"

[tool.codespell]
# As of 2026-03-02, skip has an issue on super-linter
# https://github.com/super-linter/super-linter/issues/7466
skip = "*/paper.bib,./build"
# Collet is a surname, Commun is an abbr for a journal name,
# fo is used in several test strings, and Ines is also a surname.
ignore-words-list = "Collet,Commun,fo,Ines"

[tool.ruff]
src = ["src/mmh3/__init__.pyi", "util", "tests", "benchmark", "docs"]

[tool.ruff.lint]
select = ["E", "W", "F", "I", "UP", "B", "SIM", "C4", "ISC", "NPY"]

[tool.ruff.lint.isort]
known-first-party = ["mmh3"]

[tool.setuptools]
include-package-data = true
ext-modules = [
  {name = "mmh3", sources = ["./src/mmh3/mmh3module.c", "./src/mmh3/murmurhash3.c"]}
]

[tool.setuptools.package-data]
mmh3 = ["*.h"]

[tool.pylint]
ignore-paths = [
  "^build",
  "^venv",
  "^.venv",
  "^.tox",
  "^src/mmh3/__init__.pyi"
]
# Use multiple processes to speed up Pylint.
# The value 0 specifies the number of processors to be auto-detected.
# This setting can be found in the template file of super-linter 7.0.0.
jobs = 0
# import-error: An error tricky to resolve, especially on super-linter.
# wrong-import-order: Respect Ruff's import order.
disable = [
  "import-error",
  "wrong-import-order"
]


================================================
FILE: src/mmh3/__init__.pyi
================================================
import sys
from typing import Any, final

if sys.version_info >= (3, 12):
    from collections.abc import Buffer
else:
    from _typeshed import ReadableBuffer as Buffer

def hash(key: bytes | str, seed: int = 0, signed: Any = True) -> int: ...
def hash_from_buffer(key: Buffer | str, seed: int = 0, signed: Any = True) -> int: ...
def hash64(
    key: bytes | str, seed: int = 0, x64arch: Any = True, signed: Any = True
) -> tuple[int, int]: ...
def hash128(
    key: bytes | str, seed: int = 0, x64arch: Any = True, signed: Any = False
) -> int: ...
def hash_bytes(key: bytes | str, seed: int = 0, x64arch: Any = True) -> bytes: ...
def mmh3_32_digest(key: Buffer | str, seed: int = 0) -> bytes: ...
def mmh3_32_sintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_32_uintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_x64_128_digest(key: Buffer | str, seed: int = 0) -> bytes: ...
def mmh3_x64_128_sintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_x64_128_uintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_x64_128_stupledigest(key: Buffer | str, seed: int = 0) -> tuple[int, int]: ...
def mmh3_x64_128_utupledigest(key: Buffer | str, seed: int = 0) -> tuple[int, int]: ...
def mmh3_x86_128_digest(key: Buffer | str, seed: int = 0) -> bytes: ...
def mmh3_x86_128_sintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_x86_128_uintdigest(key: Buffer | str, seed: int = 0) -> int: ...
def mmh3_x86_128_stupledigest(key: Buffer | str, seed: int = 0) -> tuple[int, int]: ...
def mmh3_x86_128_utupledigest(key: Buffer | str, seed: int = 0) -> tuple[int, int]: ...

class Hasher:
    def __init__(self, data: Buffer | None = None, seed: int = 0) -> None: ...
    def update(self, data: Buffer) -> None: ...
    def digest(self) -> bytes: ...
    def sintdigest(self) -> int: ...
    def uintdigest(self) -> int: ...
    def copy(self) -> Hasher: ...
    @property
    def digest_size(self) -> int: ...
    @property
    def block_size(self) -> int: ...
    @property
    def name(self) -> str: ...

@final
class mmh3_32(Hasher): ...

@final
class mmh3_x64_128(Hasher):
    def stupledigest(self) -> tuple[int, int]: ...
    def utupledigest(self) -> tuple[int, int]: ...

@final
class mmh3_x86_128(Hasher):
    def stupledigest(self) -> tuple[int, int]: ...
    def utupledigest(self) -> tuple[int, int]: ...


================================================
FILE: src/mmh3/hashlib.h
================================================
// This code was taken from a part of CPython's code base (Modules/hashlib.h)
// at commit 9ce0f48e918860ffa32751a85b0fe7967723e2e3
// Below is a copy of the license of CPython

// PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
// --------------------------------------------
//
// 1. This LICENSE AGREEMENT is between the Python Software Foundation
// ("PSF"), and the Individual or Organization ("Licensee") accessing and
// otherwise using this software ("Python") in source or binary form and
// its associated documentation.
//
// 2. Subject to the terms and conditions of this License Agreement, PSF hereby
// grants Licensee a nonexclusive, royalty-free, world-wide license to
// reproduce, analyze, test, perform and/or display publicly, prepare
// derivative works, distribute, and otherwise use Python alone or in any
// derivative version, provided, however, that PSF's License Agreement and
// PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004,
// 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
// 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; All
// Rights Reserved" are retained in Python alone or in any derivative version
// prepared by Licensee.
//
// 3. In the event Licensee prepares a derivative work that is based on
// or incorporates Python or any part thereof, and wants to make
// the derivative work available to others as provided herein, then
// Licensee hereby agrees to include in any such work a brief summary of
// the changes made to Python.
//
// 4. PSF is making Python available to Licensee on an "AS IS"
// basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
// IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
// DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
// FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
// INFRINGE ANY THIRD PARTY RIGHTS.
//
// 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
// FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
// A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
// OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
//
// 6. This License Agreement will automatically terminate upon a material
// breach of its terms and conditions.
//
// 7. Nothing in this License Agreement shall be deemed to create any
// relationship of agency, partnership, or joint venture between PSF and
// Licensee.  This License Agreement does not grant permission to use PSF
// trademarks or trade name in a trademark sense to endorse or promote
// products or services of Licensee, or any third party.
//
// 8. By copying, installing or otherwise using Python, Licensee
// agrees to be bound by the terms and conditions of this License
// Agreement.

/*
 * Given a PyObject* obj, fill in the Py_buffer* viewp with the result
 * of PyObject_GetBuffer.  Sets an exception and issues the erraction
 * on any errors, e.g. 'return NULL' or 'goto error'.
 */
#define GET_BUFFER_VIEW_OR_ERROR(obj, viewp, erraction)                   \
    do {                                                                  \
        if (PyUnicode_Check((obj))) {                                     \
            PyErr_SetString(PyExc_TypeError,                              \
                            "Strings must be encoded before hashing");    \
            erraction;                                                    \
        }                                                                 \
        if (!PyObject_CheckBuffer((obj))) {                               \
            PyErr_SetString(PyExc_TypeError,                              \
                            "object supporting the buffer API required"); \
            erraction;                                                    \
        }                                                                 \
        if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) {     \
            erraction;                                                    \
        }                                                                 \
        if ((viewp)->ndim > 1) {                                          \
            PyErr_SetString(PyExc_BufferError,                            \
                            "Buffer must be single dimension");           \
            PyBuffer_Release((viewp));                                    \
            erraction;                                                    \
        }                                                                 \
    } while (0)

#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) \
    GET_BUFFER_VIEW_OR_ERROR(obj, viewp, return NULL)

================================================
FILE: src/mmh3/mmh3module.c
================================================
// To handle 64-bit data; see https://docs.python.org/3/c-api/arg.html
#ifndef PY_SSIZE_T_CLEAN
#define PY_SSIZE_T_CLEAN
#endif

#include <Python.h>
#include <stdio.h>
#include <string.h>

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#include <byteswap.h>
#endif

#include "hashlib.h"
#include "murmurhash3.h"

#if defined(_MSC_VER)
typedef signed __int8 int8_t;
typedef signed __int32 int32_t;
typedef signed __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
// Other compilers
#else  // defined(_MSC_VER)
#include <stdint.h>
#endif  // defined(_MSC_VER)

#define MMH3_32_DIGESTSIZE 4
#define MMH3_128_DIGESTSIZE 16

#define MMH3_32_BLOCKSIZE 12
#define MMH3_128_BLOCKSIZE 32

#define MMH3_VALIDATE_SEED_RETURN_NULL(seed)                       \
    if (seed < 0 || seed > 0xFFFFFFFF) {                           \
        PyErr_SetString(PyExc_ValueError, "seed is out of range"); \
        return NULL;                                               \
    }

#define MMH3_VALIDATE_SEED_RETURN_INT(seed, buf)                   \
    if (seed < 0 || seed > 0xFFFFFFFF) {                           \
        PyBuffer_Release(&buf);                                    \
        PyErr_SetString(PyExc_ValueError, "seed is out of range"); \
        return -1;                                                 \
    }

// obj: PyObject*
// target_str: const char *
// len: Py_ssize_t
#define MMH3_HASH_VALIDATE_AND_SET_BYTES(obj, target_str, len)          \
    if (PyBytes_Check(obj)) {                                           \
        target_str_len = PyBytes_Size(obj);                             \
        target_str = PyBytes_AS_STRING(obj);                            \
    }                                                                   \
    else if (PyUnicode_Check(obj)) {                                    \
        target_str_len = PyUnicode_GET_LENGTH(obj);                     \
        target_str = PyUnicode_AsUTF8AndSize(obj, &target_str_len);     \
    }                                                                   \
    else {                                                              \
        PyErr_Format(PyExc_TypeError,                                   \
                     "argument 1 must be read-only bytes-like object, " \
                     "not '%s'",                                        \
                     Py_TYPE(obj)->tp_name);                            \
        return NULL;                                                    \
    }

// obj: PyObject*
// seed: unsigned long
#define MMH3_HASH_VALIDATE_AND_SET_SEED(obj, seed)                      \
    if (!PyLong_Check(obj)) {                                           \
        PyErr_Format(PyExc_TypeError,                                   \
                     "'%s' object cannot be interpreted as an integer", \
                     Py_TYPE(obj)->tp_name);                            \
        return NULL;                                                    \
    }                                                                   \
    seed = PyLong_AsUnsignedLong(obj);                                  \
    if (seed == (unsigned long)-1 && PyErr_Occurred()) {                \
        if (PyErr_ExceptionMatches(PyExc_OverflowError)) {              \
            PyErr_SetString(PyExc_ValueError, "seed is out of range");  \
            return NULL;                                                \
        }                                                               \
    }                                                                   \
    if (seed > 0xFFFFFFFF) {                                            \
        PyErr_SetString(PyExc_ValueError, "seed is out of range");      \
        return NULL;                                                    \
    }

// nargs: Py_ssize_t
// name: const char *
// pos: int
#define MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, name, pos) \
    if (nargs >= pos) {                                      \
        PyErr_Format(PyExc_TypeError,                        \
                     "argument for function given by name "  \
                     "('%s') and position (%d)",             \
                     name, pos);                             \
        return NULL;                                         \
    }

#define MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed)                  \
    if (nargs < 1) {                                                        \
        PyErr_SetString(PyExc_TypeError,                                    \
                        "function takes at least 1 argument (0 given)");    \
        return NULL;                                                        \
    }                                                                       \
    if (nargs > 2) {                                                        \
        PyErr_Format(PyExc_TypeError,                                       \
                     "function takes at most 2 arguments (%d given)",       \
                     (int)nargs);                                           \
        return NULL;                                                        \
    }                                                                       \
    if (nargs == 2) {                                                       \
        if (!PyLong_Check(args[1])) {                                       \
            PyErr_Format(PyExc_TypeError,                                   \
                         "'%s' object cannot be interpreted as an integer", \
                         Py_TYPE(args[1])->tp_name);                        \
            return NULL;                                                    \
        }                                                                   \
        const unsigned long seed_tmp = PyLong_AsUnsignedLong(args[1]);      \
        if (seed_tmp == (unsigned long)-1 && PyErr_Occurred()) {            \
            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {              \
                PyErr_SetString(PyExc_ValueError, "seed is out of range");  \
                return NULL;                                                \
            }                                                               \
        }                                                                   \
        if (seed_tmp > 0xFFFFFFFF) {                                        \
            PyErr_SetString(PyExc_ValueError, "seed is out of range");      \
            return NULL;                                                    \
        }                                                                   \
        seed = (uint32_t)seed_tmp;                                          \
    }

//-----------------------------------------------------------------------------
// Helpers for mutex manipulations for hashers

#ifdef Py_GIL_DISABLED
#define MMH3_HASHER_LOCK(obj) PyMutex_Lock(&(obj->mutex))
#define MMH3_HASHER_UNLOCK(obj) PyMutex_Unlock(&(obj->mutex))
#define MMH3_HASHER_INIT_MUTEX(obj) \
    PyMutex t = {0};                \
    obj->mutex = t;

#else
#define MMH3_HASHER_LOCK(obj) (void)0
#define MMH3_HASHER_UNLOCK(obj) (void)0
#define MMH3_HASHER_INIT_MUTEX(obj) (void)0
#endif

//-----------------------------------------------------------------------------
// One shot functions

PyDoc_STRVAR(
    mmh3_hash_doc,
    "hash(key, seed=0, signed=True) -> int\n"
    "\n"
    "Return a hash as a 32-bit integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_32 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (bytes | str): The input data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "    signed (Any): If True, return a signed integer. Otherwise, return\n"
    "        an unsigned integer.\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 32-bit integer.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n"
    "    The type of the ``signed`` argument has been changed from\n"
    "    ``bool`` to ``Any``. Performance improvements have been made.\n");

static PyObject *
mmh3_hash(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
          PyObject *kwnames)
{
    const char *target_str;
    Py_ssize_t target_str_len;
    unsigned long seed = 0;
    int32_t result[1];
    long long_result = 0;
    int is_signed = 1;

#ifndef _MSC_VER
#if __LONG_WIDTH__ == 64 || defined(__APPLE__)
    static uint64_t mask[] = {0x0ffffffff, 0xffffffffffffffff};
#endif
#endif

    if ((nargs < 1) && kwnames == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "function missing required argument 'key' (pos 1)");
        return NULL;
    }

    if (nargs > 3) {
        PyErr_Format(PyExc_TypeError,
                     "function takes at most 3 arguments (%d given)",
                     (int)nargs);
        return NULL;
    }

    if (nargs >= 1) {
        MMH3_HASH_VALIDATE_AND_SET_BYTES(args[0], target_str, target_str_len);
    }

    if (nargs >= 2) {
        MMH3_HASH_VALIDATE_AND_SET_SEED(args[1], seed);
    }

    if (nargs >= 3) {
        is_signed = PyObject_IsTrue(args[2]);
    }

    if (kwnames) {
        for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) {
            const char *kwname = PyUnicode_AsUTF8(PyTuple_GetItem(kwnames, i));
            if (strcmp(kwname, "key") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "key", 1);
                MMH3_HASH_VALIDATE_AND_SET_BYTES(args[nargs + i], target_str,
                                                 target_str_len);
            }
            else if (strcmp(kwname, "seed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "seed", 2);
                MMH3_HASH_VALIDATE_AND_SET_SEED(args[nargs + i], seed);
            }
            else if (strcmp(kwname, "signed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "signed", 3);
                is_signed = PyObject_IsTrue(args[nargs + i]);
            }
            else {
                PyErr_Format(
                    PyExc_TypeError,
                    "'%s' is an invalid keyword argument for this function",
                    kwname);
                return NULL;
            }
        }
    }

    murmurhash3_x86_32(target_str, target_str_len, (uint32_t)seed, result);

#if defined(_MSC_VER)
    /* for Windows envs */
    long_result = result[0];
    if (is_signed == 1) {
        return PyLong_FromLong(long_result);
    }
    else {
        return PyLong_FromUnsignedLong(long_result);
    }
#else  // defined(_MSC_VER)
    /* for standard envs */
#if __LONG_WIDTH__ == 64 || defined(__APPLE__)
    long_result = result[0] & mask[is_signed];
    return PyLong_FromLong(long_result);
#else   // __LONG_WIDTH__ == 64 || defined(__APPLE__)
    long_result = result[0];
    if (is_signed == 1) {
        return PyLong_FromLong(long_result);
    }
    else {
        return PyLong_FromUnsignedLong(long_result);
    }
#endif  // __LONG_WIDTH__ == 64 || defined(__APPLE__)
#endif  // defined(_MSC_VER)
}

PyDoc_STRVAR(
    mmh3_hash_from_buffer_doc,
    "hash_from_buffer(key, seed=0, signed=True) -> int\n"
    "\n"
    "Return a hash for the buffer as a 32-bit integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_32 algorithm. Designed for large "
    "memory-views such as numpy arrays.\n"
    "\n"
    "Args:\n"
    "    key (Buffer | str): The buffer to hash. String inputs are also\n"
    "        supported and are automatically converted to `bytes` using\n"
    "        UTF-8 encoding before hashing.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "    signed (Any): If True, return a signed integer. Otherwise, return\n"
    "        an unsigned integer.\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 32-bit integer.\n"
    "\n"
    ".. deprecated:: 5.0.0\n"
    "    Use ``mmh3_32_sintdigest()`` or ``mmh3_32_uintdigest()`` instead.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n"
    "    The type of the ``signed`` argument has been changed from\n"
    "    ``bool`` to ``Any``.\n");

static PyObject *
mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds)
{
    Py_buffer target_buf;
    long long seed = 0;
    int32_t result[1];
    long long_result = 0;
    int is_signed = 1;

    static char *kwlist[] = {"key", "seed", "signed", NULL};

#ifndef _MSC_VER
#if __LONG_WIDTH__ == 64 || defined(__APPLE__)
    static uint64_t mask[] = {0x0ffffffff, 0xffffffffffffffff};
#endif
#endif

    if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|Lp", kwlist,
                                     &target_buf, &seed, &is_signed)) {
        return NULL;
    }

    MMH3_VALIDATE_SEED_RETURN_NULL(seed);

    murmurhash3_x86_32(target_buf.buf, target_buf.len, (uint32_t)seed, result);

    PyBuffer_Release(&target_buf);

#if defined(_MSC_VER)
    /* for Windows envs */
    long_result = result[0];
    if (is_signed == 1) {
        return PyLong_FromLong(long_result);
    }
    else {
        return PyLong_FromUnsignedLong(long_result);
    }
#else  // defined(_MSC_VER)
/* for standard envs */
#if __LONG_WIDTH__ == 64 || defined(__APPLE__)
    long_result = result[0] & mask[is_signed];
    return PyLong_FromLong(long_result);
#else   // __LONG_WIDTH__ == 64 || defined(__APPLE__)
    long_result = result[0];
    if (is_signed == 1) {
        return PyLong_FromLong(long_result);
    }
    else {
        return PyLong_FromUnsignedLong(long_result);
    }
#endif  // __LONG_WIDTH__ == 64 || defined(__APPLE__)
#endif  // defined(_MSC_VER)
}

PyDoc_STRVAR(
    mmh3_hash64_doc,
    "hash64(key, seed=0, x64arch=True, signed=True) -> tuple[int, int]\n"
    "\n"
    "Return a hash as a tuple of two 64-bit integers.\n"
    "\n"
    "Calculated by the MurmurHash3_x{64, 86}_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (bytes | str): The input data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "    x64arch (Any): If True, use an algorithm optimized for 64-bit\n"
    "        architecture. Otherwise, use one optimized for 32-bit\n"
    "        architecture.\n"
    "    signed (Any): If True, return a signed integer. Otherwise, return\n"
    "        an unsigned integer.\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The hash value as a tuple of two 64-bit "
    "integers.\n"
    "\n"
    ".. versionchanged:: 5.1.0\n"
    "    Performance improvements.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n"
    "    The type of the ``x64arch`` and ``signed`` arguments has been\n"
    "    changed from ``bool`` to ``Any``.\n");

static PyObject *
mmh3_hash64(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
            PyObject *kwnames)
{
    const char *target_str;
    Py_ssize_t target_str_len;
    long long seed = 0;
    uint64_t result[2];
    int x64arch = 1;
    int is_signed = 1;

    static char *valflag[] = {"KK", "LL"};

    if ((nargs < 1) && kwnames == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "function missing required argument 'key' (pos 1)");
        return NULL;
    }

    if (nargs > 4) {
        PyErr_Format(PyExc_TypeError,
                     "function takes at most 4 arguments (%d given)",
                     (int)nargs);
        return NULL;
    }

    if (nargs >= 1) {
        MMH3_HASH_VALIDATE_AND_SET_BYTES(args[0], target_str, target_str_len);
    }

    if (nargs >= 2) {
        MMH3_HASH_VALIDATE_AND_SET_SEED(args[1], seed);
    }

    if (nargs >= 3) {
        x64arch = PyObject_IsTrue(args[2]);
    }

    if (nargs >= 4) {
        is_signed = PyObject_IsTrue(args[2]);
    }

    if (kwnames) {
        for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) {
            const char *kwname = PyUnicode_AsUTF8(PyTuple_GetItem(kwnames, i));
            if (strcmp(kwname, "key") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "key", 1);
                MMH3_HASH_VALIDATE_AND_SET_BYTES(args[nargs + i], target_str,
                                                 target_str_len);
            }
            else if (strcmp(kwname, "seed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "seed", 2);
                MMH3_HASH_VALIDATE_AND_SET_SEED(args[nargs + i], seed);
            }
            else if (strcmp(kwname, "x64arch") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "x64arch", 3);
                x64arch = PyObject_IsTrue(args[nargs + i]);
            }
            else if (strcmp(kwname, "signed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "signed", 4);
                is_signed = PyObject_IsTrue(args[nargs + i]);
            }
            else {
                PyErr_Format(
                    PyExc_TypeError,
                    "'%s' is an invalid keyword argument for this function",
                    kwname);
                return NULL;
            }
        }
    }

    if (x64arch == 1) {
        murmurhash3_x64_128(target_str, target_str_len, (uint32_t)seed,
                            result);
    }
    else {
        murmurhash3_x86_128(target_str, target_str_len, (uint32_t)seed,
                            result);
    }

    PyObject *retval = Py_BuildValue(valflag[is_signed], result[0], result[1]);
    return retval;
}

PyDoc_STRVAR(
    mmh3_hash128_doc,
    "hash128(key, seed=0, x64arch=True, signed=False) -> int\n"
    "\n"
    "Return a hash as a 128-bit integer.\n\n"
    "Calculated by the MurmurHash3_x{64, 86}_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (bytes | str): The input data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "    x64arch (Any): If True, use an algorithm optimized for 64-bit\n"
    "        architecture. Otherwise, use one optimized for 32-bit\n"
    "        architecture.\n"
    "    signed (Any): If True, return a signed integer. Otherwise, return\n"
    "        an unsigned integer.\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 128-bit integer.\n"
    "\n"
    ".. versionchanged:: 5.1.0\n"
    "    Performance improvements.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n"
    "    The type of the ``x64arch`` and ``signed`` arguments has been\n"
    "    changed from ``bool`` to ``Any``.\n");

static PyObject *
mmh3_hash128(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
             PyObject *kwnames)
{
    const char *target_str;
    Py_ssize_t target_str_len;
    long long seed = 0;
    uint64_t result[2];
    int x64arch = 1;
    int is_signed = 0;

    if ((nargs < 1) && kwnames == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "function missing required argument 'key' (pos 1)");
        return NULL;
    }

    if (nargs > 4) {
        PyErr_Format(PyExc_TypeError,
                     "function takes at most 4 arguments (%d given)",
                     (int)nargs);
        return NULL;
    }

    if (nargs >= 1) {
        MMH3_HASH_VALIDATE_AND_SET_BYTES(args[0], target_str, target_str_len);
    }

    if (nargs >= 2) {
        MMH3_HASH_VALIDATE_AND_SET_SEED(args[1], seed);
    }

    if (nargs >= 3) {
        x64arch = PyObject_IsTrue(args[2]);
    }

    if (nargs >= 4) {
        is_signed = PyObject_IsTrue(args[2]);
    }

    if (kwnames) {
        for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) {
            const char *kwname = PyUnicode_AsUTF8(PyTuple_GetItem(kwnames, i));
            if (strcmp(kwname, "key") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "key", 1);
                MMH3_HASH_VALIDATE_AND_SET_BYTES(args[nargs + i], target_str,
                                                 target_str_len);
            }
            else if (strcmp(kwname, "seed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "seed", 2);
                MMH3_HASH_VALIDATE_AND_SET_SEED(args[nargs + i], seed);
            }
            else if (strcmp(kwname, "x64arch") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "x64arch", 3);
                x64arch = PyObject_IsTrue(args[nargs + i]);
            }
            else if (strcmp(kwname, "signed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "signed", 4);
                is_signed = PyObject_IsTrue(args[nargs + i]);
            }
            else {
                PyErr_Format(
                    PyExc_TypeError,
                    "'%s' is an invalid keyword argument for this function",
                    kwname);
                return NULL;
            }
        }
    }

    if (x64arch == 1) {
        murmurhash3_x64_128(target_str, target_str_len, seed, result);
    }
    else {
        murmurhash3_x86_128(target_str, target_str_len, seed, result);
    }

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray(
        (unsigned char *)result, MMH3_128_DIGESTSIZE, 1, is_signed);

    return retval;
}

PyDoc_STRVAR(
    mmh3_hash_bytes_doc,
    "hash_bytes(key, seed=0, x64arch=True) -> bytes\n"
    "\n"
    "Return a 16-byte hash of the ``bytes`` type.\n"
    "\n"
    "Args:\n"
    "    key (bytes | str): The input data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "    x64arch (Any): If True, use an algorithm optimized for 64-bit\n"
    "        architecture. Otherwise, use one optimized for 32-bit\n"
    "        architecture.\n"
    "Returns:\n"
    "    bytes: The hash value as the ``bytes`` type with a length of 16\n"
    "    bytes (128 bits).\n")
    "\n"
    ".. versionchanged:: 5.1.0\n"
    "    Performance improvements.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n"
    "    The type of the ``x64arch`` argument has been changed from\n"
    "    ``bool`` to ``Any``.\n";

static PyObject *
mmh3_hash_bytes(PyObject *self, PyObject *const *args, Py_ssize_t nargs,
                PyObject *kwnames)
{
    const char *target_str;
    Py_ssize_t target_str_len;
    long long seed = 0;
    uint64_t result[2];
    int x64arch = 1;

    if ((nargs < 1) && kwnames == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "function missing required argument 'key' (pos 1)");
        return NULL;
    }

    if (nargs > 3) {
        PyErr_Format(PyExc_TypeError,
                     "function takes at most 3 arguments (%d given)",
                     (int)nargs);
        return NULL;
    }

    if (nargs >= 1) {
        MMH3_HASH_VALIDATE_AND_SET_BYTES(args[0], target_str, target_str_len);
    }

    if (nargs >= 2) {
        MMH3_HASH_VALIDATE_AND_SET_SEED(args[1], seed);
    }

    if (nargs >= 3) {
        x64arch = PyObject_IsTrue(args[2]);
    }

    if (kwnames) {
        for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) {
            const char *kwname = PyUnicode_AsUTF8(PyTuple_GetItem(kwnames, i));
            if (strcmp(kwname, "key") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "key", 1);
                MMH3_HASH_VALIDATE_AND_SET_BYTES(args[nargs + i], target_str,
                                                 target_str_len);
            }
            else if (strcmp(kwname, "seed") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "seed", 2);
                MMH3_HASH_VALIDATE_AND_SET_SEED(args[nargs + i], seed);
            }
            else if (strcmp(kwname, "x64arch") == 0) {
                MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "x64arch", 3);
                x64arch = PyObject_IsTrue(args[nargs + i]);
            }
            else {
                PyErr_Format(
                    PyExc_TypeError,
                    "'%s' is an invalid keyword argument for this function",
                    kwname);
                return NULL;
            }
        }
    }

    if (x64arch == 1) {
        murmurhash3_x64_128(target_str, target_str_len, seed, result);
    }
    else {
        murmurhash3_x86_128(target_str, target_str_len, seed, result);
    }

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    return PyBytes_FromStringAndSize((char *)result, MMH3_128_DIGESTSIZE);
}

//-----------------------------------------------------------------------------
// Functions that accept a buffer

PyDoc_STRVAR(
    mmh3_mmh3_32_digest_doc,
    "mmh3_32_digest(key, seed=0, /) -> bytes\n"
    "\n"
    "Return a 4-byte hash of the ``bytes`` type for the buffer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_32 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    bytes: The hash value as the ``bytes`` type with a length of\n"
    "    4 bytes (32 bits).\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_32_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    char result[MMH3_32_DIGESTSIZE];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    ((uint32_t *)result)[0] = bswap_32(((uint32_t *)result)[0]);
#endif

    return PyBytes_FromStringAndSize((char *)result, MMH3_32_DIGESTSIZE);
}

PyDoc_STRVAR(
    mmh3_mmh3_32_sintdigest_doc,
    "mmh3_32_sintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 32-bit signed integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_32 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 32-bit signed integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_32_sintdigest(PyObject *self, PyObject *const *args,
                        Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    int32_t result[1];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    return PyLong_FromLong(result[0]);
}

PyDoc_STRVAR(
    mmh3_mmh3_32_uintdigest_doc,
    "mmh3_32_uintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 32-bit unsigned integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_32 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 32-bit unsigned integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_32_uintdigest(PyObject *self, PyObject *const *args,
                        Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint32_t result[1];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_32(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    return PyLong_FromUnsignedLong(result[0]);
}

PyDoc_STRVAR(
    mmh3_mmh3_x64_128_digest_doc,
    "mmh3_x64_128_digest(key, seed=0, /) -> bytes\n"
    "\n"
    "Return a 16-byte hash of the ``bytes`` type for the buffer.\n"
    "\n"
    "Calculated by the MurmurHash3_x64_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    bytes: The hash value as the ``bytes`` type with a length of\n"
    "    16 bytes (128 bits).\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x64_128_digest(PyObject *self, PyObject *const *args,
                         Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    return PyBytes_FromStringAndSize((char *)result, MMH3_128_DIGESTSIZE);
}

PyDoc_STRVAR(
    mmh3_mmh3_x64_128_sintdigest_doc,
    "mmh3_x64_128_sintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 128-bit signed integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x64_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 128-bit signed integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x64_128_sintdigest(PyObject *self, PyObject *const *args,
                             Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray((unsigned char *)result,
                                             MMH3_128_DIGESTSIZE, 1, 1);

    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x64_128_uintdigest_doc,
    "mmh3_x64_128_uintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 128-bit unsigned integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x64_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 128-bit unsigned integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x64_128_uintdigest(PyObject *self, PyObject *const *args,
                             Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray((unsigned char *)result,
                                             MMH3_128_DIGESTSIZE, 1, 0);

    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x64_128_stupledigest_doc,
    "mmh3_x64_128_stupledigest(key, seed=0, /) -> tuple[int, int]\n"
    "\n"
    "Return a hash for the buffer as a tuple of two 64-bit signed integers.\n"
    "\n"
    "Calculated by the MurmurHash3_x64_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The hash value as a tuple of two 64-bit signed\n"
    "    integers.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x64_128_stupledigest(PyObject *self, PyObject *const *args,
                               Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    PyObject *retval = Py_BuildValue("LL", result[0], result[1]);
    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x64_128_utupledigest_doc,
    "mmh3_x64_128_utupledigest(key, seed=0, /) -> tuple[int, int]\n"
    "\n"
    "Return a hash for the buffer as a tuple of two 64-bit unsigned "
    "integers.\n"
    "\n"
    "Calculated by the MurmurHash3_x64_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The hash value as a tuple of two 64-bit unsigned\n"
    "    integers.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x64_128_utupledigest(PyObject *self, PyObject *const *args,
                               Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x64_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    PyObject *retval = Py_BuildValue("KK", result[0], result[1]);
    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x86_128_digest_doc,
    "mmh3_x86_128_digest(key, seed=0, /) -> bytes\n"
    "\n"
    "Return a 16-byte hash of the ``bytes`` type for the buffer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    bytes: The hash value as the ``bytes`` type with a length of\n"
    "    16 bytes (128 bits).\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x86_128_digest(PyObject *self, PyObject *const *args,
                         Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    return PyBytes_FromStringAndSize((char *)result, MMH3_128_DIGESTSIZE);
}

PyDoc_STRVAR(
    mmh3_mmh3_x86_128_sintdigest_doc,
    "mmh3_x86_128_sintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 128-bit signed integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as an signed 128-bit integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x86_128_sintdigest(PyObject *self, PyObject *const *args,
                             Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray((unsigned char *)result,
                                             MMH3_128_DIGESTSIZE, 1, 1);

    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x86_128_uintdigest_doc,
    "mmh3_x86_128_uintdigest(key, seed=0, /) -> int\n"
    "\n"
    "Return a hash for the buffer as a 128-bit unsigned integer.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    int: The hash value as a 128-bit unsigned integer.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x86_128_uintdigest(PyObject *self, PyObject *const *args,
                             Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result[0] = bswap_64(result[0]);
    result[1] = bswap_64(result[1]);
#endif

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray((unsigned char *)result,
                                             MMH3_128_DIGESTSIZE, 1, 0);

    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x86_128_stupledigest_doc,
    "mmh3_x86_128_stupledigest(key, seed=0, /) -> tuple[int, int]\n"
    "\n"
    "Return a hash for the buffer as a tuple of two 64-bit signed integers.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The hash value as a tuple of two 64-bit signed\n"
    "    integers.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x86_128_stupledigest(PyObject *self, PyObject *const *args,
                               Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    PyObject *retval = Py_BuildValue("LL", result[0], result[1]);
    return retval;
}

PyDoc_STRVAR(
    mmh3_mmh3_x86_128_utupledigest_doc,
    "mmh3_x86_128_utupledigest(key, seed=0, /) -> tuple[int, int]\n"
    "\n"
    "Return a hash for the buffer as a tuple of two 64-bit unsigned "
    "integers.\n"
    "\n"
    "Calculated by the MurmurHash3_x86_128 algorithm.\n"
    "\n"
    "Args:\n"
    "    key (Buffer): The input buffer to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The hash value as a tuple of two 64-bit unsigned\n"
    "    integers.\n"
    "\n"
    ".. versionadded:: 5.0.0\n");

static PyObject *
mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *const *args,
                               Py_ssize_t nargs)
{
    Py_buffer target_buf;
    uint32_t seed = 0;
    uint64_t result[2];

    MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed);

    GET_BUFFER_VIEW_OR_ERROUT(args[0], &target_buf);

    murmurhash3_x86_128(target_buf.buf, target_buf.len, seed, result);
    PyBuffer_Release(&target_buf);

    PyObject *retval = Py_BuildValue("KK", result[0], result[1]);
    return retval;
}

// Casting to PyCFunction is mandatory for
//   METH_VARARGS | METH_KEYWORDS functions.
// See
// https://docs.python.org/3/extending/extending.html#keyword-parameters-for-extension-functions
static PyMethodDef Mmh3Methods[] = {
    {"hash", (PyCFunction)mmh3_hash, METH_FASTCALL | METH_KEYWORDS,
     mmh3_hash_doc},
    {"hash_from_buffer", (PyCFunction)mmh3_hash_from_buffer,
     METH_VARARGS | METH_KEYWORDS, mmh3_hash_from_buffer_doc},
    {"hash64", (PyCFunction)mmh3_hash64, METH_FASTCALL | METH_KEYWORDS,
     mmh3_hash64_doc},
    {"hash128", (PyCFunction)mmh3_hash128, METH_FASTCALL | METH_KEYWORDS,
     mmh3_hash128_doc},
    {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_FASTCALL | METH_KEYWORDS,
     mmh3_hash_bytes_doc},
    {"mmh3_32_digest", (PyCFunction)mmh3_mmh3_32_digest, METH_FASTCALL,
     mmh3_mmh3_32_digest_doc},
    {"mmh3_32_sintdigest", (PyCFunction)mmh3_mmh3_32_sintdigest, METH_FASTCALL,
     mmh3_mmh3_32_sintdigest_doc},
    {"mmh3_32_uintdigest", (PyCFunction)mmh3_mmh3_32_uintdigest, METH_FASTCALL,
     mmh3_mmh3_32_uintdigest_doc},
    {"mmh3_x64_128_digest", (PyCFunction)mmh3_mmh3_x64_128_digest,
     METH_FASTCALL, mmh3_mmh3_x64_128_digest_doc},
    {"mmh3_x64_128_sintdigest", (PyCFunction)mmh3_mmh3_x64_128_sintdigest,
     METH_FASTCALL, mmh3_mmh3_x64_128_sintdigest_doc},
    {"mmh3_x64_128_uintdigest", (PyCFunction)mmh3_mmh3_x64_128_uintdigest,
     METH_FASTCALL, mmh3_mmh3_x64_128_uintdigest_doc},
    {"mmh3_x64_128_stupledigest", (PyCFunction)mmh3_mmh3_x64_128_stupledigest,
     METH_FASTCALL, mmh3_mmh3_x64_128_stupledigest_doc},
    {"mmh3_x64_128_utupledigest", (PyCFunction)mmh3_mmh3_x64_128_utupledigest,
     METH_FASTCALL, mmh3_mmh3_x64_128_utupledigest_doc},
    {"mmh3_x86_128_digest", (PyCFunction)mmh3_mmh3_x86_128_digest,
     METH_FASTCALL, mmh3_mmh3_x86_128_digest_doc},
    {"mmh3_x86_128_sintdigest", (PyCFunction)mmh3_mmh3_x86_128_sintdigest,
     METH_FASTCALL, mmh3_mmh3_x86_128_sintdigest_doc},
    {"mmh3_x86_128_uintdigest", (PyCFunction)mmh3_mmh3_x86_128_uintdigest,
     METH_FASTCALL, mmh3_mmh3_x86_128_uintdigest_doc},
    {"mmh3_x86_128_stupledigest", (PyCFunction)mmh3_mmh3_x86_128_stupledigest,
     METH_FASTCALL, mmh3_mmh3_x86_128_stupledigest_doc},
    {"mmh3_x86_128_utupledigest", (PyCFunction)mmh3_mmh3_x86_128_utupledigest,
     METH_FASTCALL, mmh3_mmh3_x86_128_utupledigest_doc},
    {NULL, NULL, 0, NULL}};

//-----------------------------------------------------------------------------
// Hasher classes
//
// The design of hasher classes are loosely based on the Google Guava
// implementation (Java)

//-----------------------------------------------------------------------------
// Hasher for murmurhash3_x86_32
typedef struct {
    PyObject_HEAD uint32_t h;
    uint64_t buffer;
    uint8_t shift;
    Py_ssize_t length;
#ifdef Py_GIL_DISABLED
    PyMutex mutex;
#endif
} MMH3Hasher32;

static PyTypeObject MMH3Hasher32Type;

static FORCE_INLINE void
update32_impl(MMH3Hasher32 *self, Py_buffer *buf)
{
    Py_ssize_t i = 0;
    uint32_t h1 = 0;
    uint32_t k1 = 0;
    const uint32_t c1 = 0xe6546b64;
    const uint64_t mask = 0xffffffffUL;

    MMH3_HASHER_LOCK(self);
    h1 = self->h;

    for (; i + 4 <= buf->len; i += 4) {
        k1 = getblock32(buf->buf, i / 4);
        self->buffer |= (k1 & mask) << self->shift;
        self->length += 4;

        h1 ^= mixK1(self->buffer);
        h1 = mixH1(h1, 0, 13, c1);
        self->buffer >>= 32;
    }

    for (; i < buf->len; i++) {
        k1 = ((uint8_t *)buf->buf)[i];
        self->buffer |= (k1 & mask) << self->shift;
        self->shift += 8;
        self->length += 1;

        if (self->shift >= 32) {
            h1 ^= mixK1(self->buffer);
            h1 = mixH1(h1, 0, 13, c1);
            self->buffer >>= 32;
            self->shift -= 32;
        }
    }

    self->h = h1;

    MMH3_HASHER_UNLOCK(self);

    PyBuffer_Release(buf);

    return;
}

static void
MMH3Hasher32_dealloc(MMH3Hasher32 *self)
{
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
MMH3Hasher32_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    MMH3Hasher32 *self;
    self = (MMH3Hasher32 *)type->tp_alloc(type, 0);
    if (self != NULL) {
        self->h = 0;
        self->buffer = 0;
        self->shift = 0;
        self->length = 0;
        MMH3_HASHER_INIT_MUTEX(self);
    }
    return (PyObject *)self;
}

/* It is impossible to add docstring for __init__ in Python C extension.
  Therefore, the constructor docstring should be described in the class
  docstring. See also https://stackoverflow.com/q/11913492 */
static int
MMH3Hasher32_init(MMH3Hasher32 *self, PyObject *args, PyObject *kwds)
{
    Py_buffer target_buf = {0};
    long long seed = 0;
    static char *kwlist[] = {"data", "seed", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf,
                                     &seed))
        return -1;

    MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf);

    self->h = (uint32_t)seed;

    if (target_buf.buf != NULL) {
        // target_buf will be released in update32_impl
        update32_impl(self, &target_buf);
    }

    return 0;
}

PyDoc_STRVAR(
    MMH3Hasher_update_doc,
    "update(data)\n"
    "\n"
    "Update this hash object's state with the provided bytes-like object.\n"
    "\n"
    "Args:\n"
    "    data (Buffer): The buffer to hash.\n");

static PyObject *
MMH3Hasher32_update(MMH3Hasher32 *self, PyObject *obj)
{
    Py_buffer buf;

    GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

    // buf will be released in update32_impl
    update32_impl(self, &buf);

    Py_RETURN_NONE;
}

static FORCE_INLINE uint32_t
digest32_impl(uint32_t h, uint64_t k1, Py_ssize_t length)
{
    h ^= mixK1(k1);
    h ^= length;
    h = fmix32(h);
    return h;
}

PyDoc_STRVAR(MMH3Hasher_digest_doc,
             "digest() -> bytes\n"
             "\n"
             "Return the digest value as a ``bytes`` object.\n"
             "\n"
             "Returns:\n"
             "    bytes: The digest value.\n");

static PyObject *
MMH3Hasher32_digest(MMH3Hasher32 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3_HASHER_LOCK(self);
    uint32_t h = digest32_impl(self->h, self->buffer, self->length);
    MMH3_HASHER_UNLOCK(self);

    char out[MMH3_32_DIGESTSIZE];

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    ((uint32_t *)out)[0] = bswap_32(h);
#else
    ((uint32_t *)out)[0] = h;
#endif

    return PyBytes_FromStringAndSize(out, MMH3_32_DIGESTSIZE);
}

PyDoc_STRVAR(MMH3Hasher_sintdigest_doc,
             "sintdigest() -> int\n"
             "\n"
             "Return the digest value as a signed integer.\n"
             "\n"
             "Returns:\n"
             "    int: The digest value as a signed integer.\n");

static PyObject *
MMH3Hasher32_sintdigest(MMH3Hasher32 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3_HASHER_LOCK(self);
    uint32_t h = digest32_impl(self->h, self->buffer, self->length);
    MMH3_HASHER_UNLOCK(self);

    // Note that simple casting ("(int32_t) h") is an undefined behavior
    int32_t result = *(int32_t *)&h;

    return PyLong_FromLong(result);
}

PyDoc_STRVAR(MMH3Hasher_uintdigest_doc,
             "uintdigest() -> int\n"
             "\n"
             "Return the digest value as an unsigned integer.\n"
             "\n"
             "Returns:\n"
             "    int: The digest value as an unsigned integer.\n");

static PyObject *
MMH3Hasher32_uintdigest(MMH3Hasher32 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3_HASHER_LOCK(self);
    uint32_t h = digest32_impl(self->h, self->buffer, self->length);
    MMH3_HASHER_UNLOCK(self);

    return PyLong_FromUnsignedLong(h);
}

PyDoc_STRVAR(MMH3Hasher32_copy_doc,
             "copy() -> mmh3_32\n"
             "\n"
             "Return a copy of the hash object..\n"
             "\n"
             "Returns:\n"
             "    mmh3_32: A copy of this hash object.\n");

static PyObject *
MMH3Hasher32_copy(MMH3Hasher32 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3Hasher32 *p;

    if ((p = PyObject_New(MMH3Hasher32, &MMH3Hasher32Type)) == NULL) {
        return NULL;
    }

    MMH3_HASHER_LOCK(self);
    p->h = self->h;
    p->buffer = self->buffer;
    p->shift = self->shift;
    p->length = self->length;
    MMH3_HASHER_INIT_MUTEX(p);
    MMH3_HASHER_UNLOCK(self);

    return (PyObject *)p;
}

static PyMethodDef MMH3Hasher32_methods[] = {
    {"update", (PyCFunction)MMH3Hasher32_update, METH_O,
     MMH3Hasher_update_doc},
    {
        "digest",
        (PyCFunction)MMH3Hasher32_digest,
        METH_NOARGS,
        MMH3Hasher_digest_doc,
    },
    {"sintdigest", (PyCFunction)MMH3Hasher32_sintdigest, METH_NOARGS,
     MMH3Hasher_sintdigest_doc},
    {"uintdigest", (PyCFunction)MMH3Hasher32_uintdigest, METH_NOARGS,
     MMH3Hasher_uintdigest_doc},
    {"copy", (PyCFunction)MMH3Hasher32_copy, METH_NOARGS,
     MMH3Hasher32_copy_doc},
    {NULL} /* Sentinel */
};

static PyObject *
MMH3Hasher32_get_digest_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_32_DIGESTSIZE);
}

static PyObject *
MMH3Hasher32_get_block_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_32_BLOCKSIZE);
}

static PyObject *
MMH3Hasher32_get_name(PyObject *self, void *closure)
{
    return PyUnicode_FromStringAndSize("mmh3_32", 7);
}

static PyGetSetDef MMH3Hasher32_getsetters[] = {
    {"digest_size", (getter)MMH3Hasher32_get_digest_size, NULL,
     "int: Number of bytes in this hashes output", NULL},
    {"block_size", (getter)MMH3Hasher32_get_block_size, NULL,
     "int: Number of bytes of the internal block of this algorithm", NULL},
    {"name", (getter)MMH3Hasher32_get_name, NULL,
     "str: The hash algorithm being used by this object", NULL},
    {NULL} /* Sentinel */
};

PyDoc_STRVAR(
    MMH3Hasher32Type_doc,
    "__init__(data=None, seed=0)\n"
    "\n"
    "Hasher for incrementally calculating the murmurhash3_x86_32 hash.\n"
    "\n"
    "Args:\n"
    "    data (Buffer | None): The initial data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    ".. versionchanged:: 5.2.0\n"
    "    Experimental no-GIL support; thread safety not fully verified.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    Added the optional ``data`` parameter as the first argument.\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n");

static PyTypeObject MMH3Hasher32Type = {
    PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_32",
    .tp_doc = MMH3Hasher32Type_doc,
    .tp_basicsize = sizeof(MMH3Hasher32),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT,
    .tp_new = MMH3Hasher32_new,
    .tp_init = (initproc)MMH3Hasher32_init,
    .tp_dealloc = (destructor)MMH3Hasher32_dealloc,
    .tp_methods = MMH3Hasher32_methods,
    .tp_getset = MMH3Hasher32_getsetters,
};

//-----------------------------------------------------------------------------
// Hasher for murmurhash3_x64_128
typedef struct {
    PyObject_HEAD uint64_t h1;
    uint64_t h2;
    uint64_t buffer1;
    uint64_t buffer2;
    uint8_t shift;
    Py_ssize_t length;
#ifdef Py_GIL_DISABLED
    PyMutex mutex;
#endif
} MMH3Hasher128x64;

static PyTypeObject MMH3Hasher128x64Type;

static FORCE_INLINE void
update_x64_128_impl(MMH3Hasher128x64 *self, Py_buffer *buf)
{
    Py_ssize_t i = 0;
    uint64_t h1 = 0;
    uint64_t h2 = 0;
    uint64_t k1 = 0;
    uint64_t k2 = 0;

    MMH3_HASHER_LOCK(self);
    h1 = self->h1;
    h2 = self->h2;

    for (; i + 16 <= buf->len; i += 16) {
        k1 = getblock64(buf->buf, (i / 16) * 2);
        k2 = getblock64(buf->buf, (i / 16) * 2 + 1);

        if (self->shift == 0) {  // TODO: use bit ops
            self->buffer1 = k1;
            self->buffer2 = k2;
        }
        else if (self->shift < 64) {
            self->buffer1 |= k1 << self->shift;
            self->buffer2 = (k1 >> (64 - self->shift)) | (k2 << self->shift);
        }
        else if (self->shift == 64) {
            self->buffer2 = k1;
        }
        else {
            self->buffer2 |= k1 << (self->shift - 64);
        }

        h1 ^= mixK1_x64_128(self->buffer1);
        h1 = mixH_x64_128(h1, h2, 27, 0x52dce729UL);
        h2 ^= mixK2_x64_128(self->buffer2);
        h2 = mixH_x64_128(h2, h1, 31, 0x38495ab5UL);

        self->length += 16;
        if (self->shift == 0) {  // TODO: use bit ops
            self->buffer1 = 0;
            self->buffer2 = 0;
        }
        else if (self->shift < 64) {
            self->buffer1 = k2 >> (64 - self->shift);
            self->buffer2 = 0;
        }
        else if (self->shift == 64) {
            self->buffer1 = k2;
            self->buffer2 = 0;
        }
        else {
            self->buffer1 =
                k1 >> (128 - self->shift) | (k2 << (self->shift - 64));
            self->buffer2 = k2 >> (128 - self->shift);
        }
    }

    for (; i < buf->len; i++) {
        k1 = ((uint8_t *)buf->buf)[i];
        if (self->shift < 64) {  // TODO: use bit ops
            self->buffer1 |= k1 << self->shift;
        }
        else {
            self->buffer2 |= k1 << (self->shift - 64);
        }
        self->shift += 8;
        self->length += 1;

        if (self->shift >= 128) {
            h1 ^= mixK1_x64_128(self->buffer1);
            h1 = mixH_x64_128(h1, h2, 27, 0x52dce729UL);
            h2 ^= mixK2_x64_128(self->buffer2);
            h2 = mixH_x64_128(h2, h1, 31, 0x38495ab5UL);

            self->buffer1 = 0;
            self->buffer2 = 0;
            self->shift -= 128;
        }
    }

    self->h1 = h1;
    self->h2 = h2;
    MMH3_HASHER_UNLOCK(self);

    PyBuffer_Release(buf);
}

static void
MMH3Hasher128x64_dealloc(MMH3Hasher128x64 *self)
{
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
MMH3Hasher128x64_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    MMH3Hasher128x64 *self;
    self = (MMH3Hasher128x64 *)type->tp_alloc(type, 0);
    if (self != NULL) {
        self->h1 = 0;
        self->h2 = 0;
        self->buffer1 = 0;
        self->buffer2 = 0;
        self->shift = 0;
        self->length = 0;
        MMH3_HASHER_INIT_MUTEX(self);
    }
    return (PyObject *)self;
}

static int
MMH3Hasher128x64_init(MMH3Hasher128x64 *self, PyObject *args, PyObject *kwds)
{
    Py_buffer target_buf = {0};
    long long seed = 0;
    static char *kwlist[] = {"data", "seed", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf,
                                     &seed))
        return -1;

    MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf);

    self->h1 = (uint64_t)seed;
    self->h2 = self->h1;

    if (target_buf.buf != NULL) {
        // target_buf will be released in update_x64_128_impl
        update_x64_128_impl(self, &target_buf);
    }

    return 0;
}

static PyObject *
MMH3Hasher128x64_update(MMH3Hasher128x64 *self, PyObject *obj)
{
    Py_buffer buf;

    GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

    // buf will be released in update_x64_128_impl
    update_x64_128_impl(self, &buf);

    Py_RETURN_NONE;
}

static PyObject *
MMH3Hasher128x64_digest(MMH3Hasher128x64 *self, PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x64_128_impl(self->h1, self->h2, self->buffer1, self->buffer2,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    return PyBytes_FromStringAndSize(out, MMH3_128_DIGESTSIZE);
}

static PyObject *
MMH3Hasher128x64_sintdigest(MMH3Hasher128x64 *self,
                            PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x64_128_impl(self->h1, self->h2, self->buffer1, self->buffer2,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    const int little_endian = 1;
    const int is_signed = 1;

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray(
        (unsigned char *)out, MMH3_128_DIGESTSIZE, little_endian, is_signed);

    return retval;
}

static PyObject *
MMH3Hasher128x64_uintdigest(MMH3Hasher128x64 *self,
                            PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x64_128_impl(self->h1, self->h2, self->buffer1, self->buffer2,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    const int little_endian = 1;
    const int is_signed = 0;

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray(
        (unsigned char *)out, MMH3_128_DIGESTSIZE, little_endian, is_signed);

    return retval;
}

PyDoc_STRVAR(MMH3Hasher128_stupledigest_doc,
             "stupledigest() -> tuple[int, int]\n"
             "\n"
             "Return the digest value as a tuple of two signed integers.\n"
             "\n"
             "Returns:\n"
             "    tuple[int, int]: The digest value as a tuple of two signed\n"
             "    integers.\n");

static PyObject *
MMH3Hasher128x64_stupledigest(MMH3Hasher128x64 *self,
                              PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x64_128_impl(self->h1, self->h2, self->buffer1, self->buffer2,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);

    const char *valflag = "LL";
    uint64_t result1 = ((uint64_t *)out)[0];
    uint64_t result2 = ((uint64_t *)out)[1];

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result1 = bswap_64(result1);
    result2 = bswap_64(result2);
#endif

    return Py_BuildValue(valflag, result1, result2);
}

PyDoc_STRVAR(
    MMH3Hasher128_utupledigest_doc,
    "utupledigest() -> tuple[int, int]\n"
    "\n"
    "Return the digest value as a tuple of two unsigned integers.\n"
    "\n"
    "Returns:\n"
    "    tuple[int, int]: The digest value as a tuple of two unsigned\n"
    "    integers.\n");

static PyObject *
MMH3Hasher128x64_utupledigest(MMH3Hasher128x64 *self,
                              PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x64_128_impl(self->h1, self->h2, self->buffer1, self->buffer2,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);

    const char *valflag = "KK";
    uint64_t result1 = ((uint64_t *)out)[0];
    uint64_t result2 = ((uint64_t *)out)[1];

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result1 = bswap_64(result1);
    result2 = bswap_64(result2);
#endif

    return Py_BuildValue(valflag, result1, result2);
}

PyDoc_STRVAR(MMH3Hasher128x64_copy_doc,
             "copy() -> mmh3_128x64\n"
             "\n"
             "Return a copy of the hash object..\n"
             "\n"
             "Returns:\n"
             "    mmh3_128x64: A copy of this hash object.\n");

static PyObject *
MMH3Hasher128x64_copy(MMH3Hasher128x64 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3Hasher128x64 *p;

    if ((p = PyObject_New(MMH3Hasher128x64, &MMH3Hasher128x64Type)) == NULL) {
        return NULL;
    }

    MMH3_HASHER_LOCK(self);
    p->h1 = self->h1;
    p->h2 = self->h2;
    p->buffer1 = self->buffer1;
    p->buffer2 = self->buffer2;
    p->shift = self->shift;
    p->length = self->length;
    MMH3_HASHER_INIT_MUTEX(p);
    MMH3_HASHER_UNLOCK(self);

    return (PyObject *)p;
}

static PyMethodDef MMH3Hasher128x64_methods[] = {
    {"update", (PyCFunction)MMH3Hasher128x64_update, METH_O,
     MMH3Hasher_update_doc},
    {"digest", (PyCFunction)MMH3Hasher128x64_digest, METH_NOARGS,
     MMH3Hasher_digest_doc},
    {"sintdigest", (PyCFunction)MMH3Hasher128x64_sintdigest, METH_NOARGS,
     MMH3Hasher_sintdigest_doc},
    {"uintdigest", (PyCFunction)MMH3Hasher128x64_uintdigest, METH_NOARGS,
     MMH3Hasher_uintdigest_doc},
    {"stupledigest", (PyCFunction)MMH3Hasher128x64_stupledigest, METH_NOARGS,
     MMH3Hasher128_stupledigest_doc},
    {"utupledigest", (PyCFunction)MMH3Hasher128x64_utupledigest, METH_NOARGS,
     MMH3Hasher128_utupledigest_doc},
    {"copy", (PyCFunction)MMH3Hasher128x64_copy, METH_NOARGS,
     MMH3Hasher128x64_copy_doc},
    {NULL} /* Sentinel */
};

static PyObject *
MMH3Hasher128x64_get_digest_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_128_DIGESTSIZE);
}

static PyObject *
MMH3Hasher128x64_get_block_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_128_BLOCKSIZE);
}

static PyObject *
MMH3Hasher128x64_get_name(PyObject *self, void *closure)
{
    return PyUnicode_FromStringAndSize("mmh3_x64_128", 12);
}

static PyGetSetDef MMH3Hasher128x64_getsetters[] = {
    {"digest_size", (getter)MMH3Hasher128x64_get_digest_size, NULL,
     "int: Number of bytes in this hashes output.", NULL},
    {"block_size", (getter)MMH3Hasher128x64_get_block_size, NULL,
     "int: Number of bytes of the internal block of this algorithm.", NULL},
    {"name", (getter)MMH3Hasher128x64_get_name, NULL,
     "str: The hash algorithm being used by this object.", NULL},
    {NULL} /* Sentinel */
};

PyDoc_STRVAR(
    MMH3Hasher128x64Type_doc,
    "__init__(data=None, seed=0)\n"
    "\n"
    "Hasher for incrementally calculating the murmurhash3_x64_128 hash.\n"
    "\n"
    "Args:\n"
    "    data (Buffer | None): The initial data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range\n"
    "        [0, 0xFFFFFFFF].\n"
    "\n"
    ".. versionchanged:: 5.2.0\n"
    "    Experimental no-GIL support; thread safety not fully verified.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    Added the optional ``data`` parameter as the first argument.\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n");

static PyTypeObject MMH3Hasher128x64Type = {
    PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_x64_128",
    .tp_doc = MMH3Hasher128x64Type_doc,
    .tp_basicsize = sizeof(MMH3Hasher128x64),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT,
    .tp_new = MMH3Hasher128x64_new,
    .tp_init = (initproc)MMH3Hasher128x64_init,
    .tp_dealloc = (destructor)MMH3Hasher128x64_dealloc,
    .tp_methods = MMH3Hasher128x64_methods,
    .tp_getset = MMH3Hasher128x64_getsetters,
};

//-----------------------------------------------------------------------------
// Hasher for murmurhash3_x86_128
typedef struct {
    PyObject_HEAD uint32_t h1;
    uint32_t h2;
    uint32_t h3;
    uint32_t h4;
    uint32_t buffer1;
    uint32_t buffer2;
    uint32_t buffer3;
    uint32_t buffer4;
    uint8_t shift;
    Py_ssize_t length;
#ifdef Py_GIL_DISABLED
    PyMutex mutex;
#endif
} MMH3Hasher128x86;

static PyTypeObject MMH3Hasher128x86Type;

static FORCE_INLINE void
update_x86_128_impl(MMH3Hasher128x86 *self, Py_buffer *buf)
{
    Py_ssize_t i = 0;
    uint32_t h1 = 0;
    uint32_t h2 = 0;
    uint32_t h3 = 0;
    uint32_t h4 = 0;
    uint32_t k1 = 0;

    MMH3_HASHER_LOCK(self);
    h1 = self->h1;
    h2 = self->h2;
    h3 = self->h3;
    h4 = self->h4;

    for (; i < buf->len; i++) {
        k1 = ((uint8_t *)buf->buf)[i];
        if (self->shift < 32) {  // TODO: use bit ops
            self->buffer1 |= k1 << self->shift;
        }
        else if (self->shift < 64) {
            self->buffer2 |= k1 << (self->shift - 32);
        }
        else if (self->shift < 96) {
            self->buffer3 |= k1 << (self->shift - 64);
        }
        else {
            self->buffer4 |= k1 << (self->shift - 96);
        }
        self->shift += 8;
        self->length += 1;

        if (self->shift >= 128) {
            const uint32_t c1 = 0x239b961b;
            const uint32_t c2 = 0xab0e9789;
            const uint32_t c3 = 0x38b34ae5;
            const uint32_t c4 = 0xa1e38b93;

            h1 ^= mixK_x86_128(self->buffer1, 15, c1, c2);
            h1 = mixH1(h1, h2, 19, 0x561ccd1bUL);

            h2 ^= mixK_x86_128(self->buffer2, 16, c2, c3);
            h2 = mixH1(h2, h3, 17, 0x0bcaa747UL);

            h3 ^= mixK_x86_128(self->buffer3, 17, c3, c4);
            h3 = mixH1(h3, h4, 15, 0x96cd1c35UL);

            h4 ^= mixK_x86_128(self->buffer4, 18, c4, c1);
            h4 = mixH1(h4, h1, 13, 0x32ac3b17UL);

            self->buffer1 = 0;
            self->buffer2 = 0;
            self->buffer3 = 0;
            self->buffer4 = 0;
            self->shift -= 128;
        }
    }

    self->h1 = h1;
    self->h2 = h2;
    self->h3 = h3;
    self->h4 = h4;
    MMH3_HASHER_UNLOCK(self);

    PyBuffer_Release(buf);
}

static void
MMH3Hasher128x86_dealloc(MMH3Hasher128x86 *self)
{
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
MMH3Hasher128x86_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    MMH3Hasher128x86 *self;
    self = (MMH3Hasher128x86 *)type->tp_alloc(type, 0);
    if (self != NULL) {
        self->h1 = 0;
        self->h2 = 0;
        self->h3 = 0;
        self->h4 = 0;
        self->buffer1 = 0;
        self->buffer2 = 0;
        self->buffer3 = 0;
        self->buffer4 = 0;
        self->shift = 0;
        self->length = 0;
        MMH3_HASHER_INIT_MUTEX(self);
    }
    return (PyObject *)self;
}

static int
MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds)
{
    Py_buffer target_buf = {0};
    long long seed = 0;
    static char *kwlist[] = {"data", "seed", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*L", kwlist, &target_buf,
                                     &seed))
        return -1;

    MMH3_VALIDATE_SEED_RETURN_INT(seed, target_buf);
    self->h1 = (uint32_t)seed;
    self->h2 = self->h1;
    self->h3 = self->h1;
    self->h4 = self->h1;

    if (target_buf.buf != NULL) {
        // target_buf will be released in update_x86_128_impl
        update_x86_128_impl(self, &target_buf);
    }

    return 0;
}

static PyObject *
MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj)
{
    Py_buffer buf;

    GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

    // buf will be released in update_x86_128_impl
    update_x86_128_impl(self, &buf);

    Py_RETURN_NONE;
}

static PyObject *
MMH3Hasher128x86_digest(MMH3Hasher128x86 *self, PyObject *Py_UNUSED(ignored))
{
    char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x86_128_impl(self->h1, self->h2, self->h3, self->h4, self->buffer1,
                        self->buffer2, self->buffer3, self->buffer4,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    return PyBytes_FromStringAndSize(out, MMH3_128_DIGESTSIZE);
}

static PyObject *
MMH3Hasher128x86_sintdigest(MMH3Hasher128x86 *self,
                            PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x86_128_impl(self->h1, self->h2, self->h3, self->h4, self->buffer1,
                        self->buffer2, self->buffer3, self->buffer4,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    const int little_endian = 1;
    const int is_signed = 1;

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray(
        (unsigned char *)out, MMH3_128_DIGESTSIZE, little_endian, is_signed);

    return retval;
}

static PyObject *
MMH3Hasher128x86_uintdigest(MMH3Hasher128x86 *self,
                            PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x86_128_impl(self->h1, self->h2, self->h3, self->h4, self->buffer1,
                        self->buffer2, self->buffer3, self->buffer4,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);
    const int little_endian = 1;
    const int is_signed = 0;

    /**
     * _PyLong_FromByteArray is not a part of the official Python/C API
     * and may be removed in the future (although it is practically stable).
     * cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372365.html
     */
    PyObject *retval = _PyLong_FromByteArray(
        (unsigned char *)out, MMH3_128_DIGESTSIZE, little_endian, is_signed);

    return retval;
}

static PyObject *
MMH3Hasher128x86_stupledigest(MMH3Hasher128x86 *self,
                              PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x86_128_impl(self->h1, self->h2, self->h3, self->h4, self->buffer1,
                        self->buffer2, self->buffer3, self->buffer4,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);

    const char *valflag = "LL";
    uint64_t result1 = ((uint64_t *)out)[0];
    uint64_t result2 = ((uint64_t *)out)[1];

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result1 = bswap_64(result1);
    result2 = bswap_64(result2);
#endif

    return Py_BuildValue(valflag, result1, result2);
}

static PyObject *
MMH3Hasher128x86_utupledigest(MMH3Hasher128x86 *self,
                              PyObject *Py_UNUSED(ignored))
{
    const char out[MMH3_128_DIGESTSIZE];
    MMH3_HASHER_LOCK(self);
    digest_x86_128_impl(self->h1, self->h2, self->h3, self->h4, self->buffer1,
                        self->buffer2, self->buffer3, self->buffer4,
                        self->length, out);
    MMH3_HASHER_UNLOCK(self);

    const char *valflag = "KK";
    uint64_t result1 = ((uint64_t *)out)[0];
    uint64_t result2 = ((uint64_t *)out)[1];

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    result1 = bswap_64(result1);
    result2 = bswap_64(result2);
#endif

    return Py_BuildValue(valflag, result1, result2);
}

PyDoc_STRVAR(MMH3Hasher128x86_copy_doc,
             "copy() -> mmh3_128x86\n"
             "\n"
             "Return a copy of the hash object..\n"
             "\n"
             "Returns:\n"
             "    mmh3_128x86: A copy of this hash object.\n");

static PyObject *
MMH3Hasher128x86_copy(MMH3Hasher128x86 *self, PyObject *Py_UNUSED(ignored))
{
    MMH3Hasher128x86 *p;

    if ((p = PyObject_New(MMH3Hasher128x86, &MMH3Hasher128x86Type)) == NULL) {
        return NULL;
    }

    MMH3_HASHER_LOCK(self);
    p->h1 = self->h1;
    p->h2 = self->h2;
    p->h3 = self->h3;
    p->h4 = self->h4;
    p->buffer1 = self->buffer1;
    p->buffer2 = self->buffer2;
    p->buffer3 = self->buffer3;
    p->buffer4 = self->buffer4;
    p->shift = self->shift;
    p->length = self->length;
    MMH3_HASHER_INIT_MUTEX(p);
    MMH3_HASHER_UNLOCK(self);

    return (PyObject *)p;
}

static PyMethodDef MMH3Hasher128x86_methods[] = {
    {"update", (PyCFunction)MMH3Hasher128x86_update, METH_O,
     MMH3Hasher_update_doc},
    {"digest", (PyCFunction)MMH3Hasher128x86_digest, METH_NOARGS,
     MMH3Hasher_digest_doc},
    {"sintdigest", (PyCFunction)MMH3Hasher128x86_sintdigest, METH_NOARGS,
     MMH3Hasher_sintdigest_doc},
    {"uintdigest", (PyCFunction)MMH3Hasher128x86_uintdigest, METH_NOARGS,
     MMH3Hasher_uintdigest_doc},
    {"stupledigest", (PyCFunction)MMH3Hasher128x86_stupledigest, METH_NOARGS,
     MMH3Hasher128_stupledigest_doc},
    {"utupledigest", (PyCFunction)MMH3Hasher128x86_utupledigest, METH_NOARGS,
     MMH3Hasher128_utupledigest_doc},
    {"copy", (PyCFunction)MMH3Hasher128x86_copy, METH_NOARGS,
     MMH3Hasher128x86_copy_doc},
    {NULL} /* Sentinel */
};

static PyObject *
MMH3Hasher128x86_get_digest_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_128_DIGESTSIZE);
}

static PyObject *
MMH3Hasher128x86_get_block_size(PyObject *self, void *closure)
{
    return PyLong_FromLong(MMH3_128_BLOCKSIZE);
}

static PyObject *
MMH3Hasher128x86_get_name(PyObject *self, void *closure)
{
    return PyUnicode_FromStringAndSize("mmh3_x86_128", 12);
}

static PyGetSetDef MMH3Hasher128x86_getsetters[] = {
    {"digest_size", (getter)MMH3Hasher128x86_get_digest_size, NULL,
     "int: Number of bytes in this hashes output", NULL},
    {"block_size", (getter)MMH3Hasher128x86_get_block_size, NULL,
     "int: Number of bytes of the internal block of this algorithm", NULL},
    {"name", (getter)MMH3Hasher128x86_get_name, NULL,
     "str: The hash algorithm being used by this object", NULL},
    {NULL} /* Sentinel */
};

PyDoc_STRVAR(
    MMH3Hasher128x86Type_doc,
    "__init__(data=None, seed=0)\n"
    "\n"
    "Hasher for incrementally calculating the murmurhash3_x86_128 hash.\n"
    "\n"
    "Args:\n"
    "    data (Buffer | None): The initial data to hash.\n"
    "    seed (int): The seed value. Must be an integer in the range "
    "[0, 0xFFFFFFFF].\n"
    "\n"
    ".. versionchanged:: 5.2.0\n"
    "    Experimental no-GIL support; thread safety not fully verified.\n"
    "\n"
    ".. versionchanged:: 5.0.0\n"
    "    Added the optional ``data`` parameter as the first argument.\n"
    "    The ``seed`` argument is now strictly checked for valid range.\n");

static PyTypeObject MMH3Hasher128x86Type = {
    PyVarObject_HEAD_INIT(NULL, 0).tp_name = "mmh3.mmh3_x86_128",
    .tp_doc = MMH3Hasher128x86Type_doc,
    .tp_basicsize = sizeof(MMH3Hasher128x86),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT,
    .tp_new = MMH3Hasher128x86_new,
    .tp_init = (initproc)MMH3Hasher128x86_init,
    .tp_dealloc = (destructor)MMH3Hasher128x86_dealloc,
    .tp_methods = MMH3Hasher128x86_methods,
    .tp_getset = MMH3Hasher128x86_getsetters,
};

//-----------------------------------------------------------------------------
// Module

static struct PyModuleDef mmh3module = {
    PyModuleDef_HEAD_INIT,
    "mmh3",
    "A Python front-end to MurmurHash3.\n"
    "\n"
    "A Python front-end to MurmurHash3, "
    "a fast and robust non-cryptographic hash library "
    "created by Austin Appleby (http://code.google.com/p/smhasher/).\n"
    "\n"
    "Ported by Hajime Senuma <hajime.senuma@gmail.com>. "
    "If you find any bugs, please submit an issue via "
    "https://github.com/hajimes/mmh3.\n"
    "\n"
    "Typical usage example:\n"
    "\n"
    "  mmh3.hash(\"foobar\", 42)",
    -1,
    Mmh3Methods,
    NULL,
    NULL,
    NULL,
    NULL};

PyMODINIT_FUNC
PyInit_mmh3(void)
{
    if (PyType_Ready(&MMH3Hasher32Type) < 0)
        return NULL;

    if (PyType_Ready(&MMH3Hasher128x64Type) < 0)
        return NULL;

    if (PyType_Ready(&MMH3Hasher128x86Type) < 0)
        return NULL;

    PyObject *module = PyModule_Create(&mmh3module);

    if (module == NULL)
        return NULL;

#ifdef Py_GIL_DISABLED
    PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED);
#endif

    Py_INCREF(&MMH3Hasher32Type);
    if (PyModule_AddObject(module, "mmh3_32", (PyObject *)&MMH3Hasher32Type) <
        0) {
        Py_DECREF(&MMH3Hasher32Type);
        Py_DECREF(module);
        return NULL;
    }

    Py_INCREF(&MMH3Hasher128x64Type);
    if (PyModule_AddObject(module, "mmh3_x64_128",
                           (PyObject *)&MMH3Hasher128x64Type) < 0) {
        Py_DECREF(&MMH3Hasher128x64Type);
        Py_DECREF(module);
        return NULL;
    }

    Py_INCREF(&MMH3Hasher128x86Type);
    if (PyModule_AddObject(module, "mmh3_x86_128",
                           (PyObject *)&MMH3Hasher128x86Type) < 0) {
        Py_DECREF(&MMH3Hasher128x86Type);
        Py_DECREF(module);
        return NULL;
    }

    return module;
}

================================================
FILE: src/mmh3/murmurhash3.c
================================================
/***
 * This file is under MIT <year> Hajime Senuma, just like other files.
 * See LICENSE for details.
 *
 * It was originally written by Austin Appleby in C++ under the public domain,
 * but ported to PEP 7 C for Python 3.6 and later by the mmh3 project.
 *
 * Any issues should be reported to https://github.com/hajimes/mmh3/issues.
 *
 * The following is the original public domain notice by Austin Appleby.
 */

//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.

#include "murmurhash3.h"

//-----------------------------------------------------------------------------

void
murmurhash3_x86_32(const void *key, Py_ssize_t len, uint32_t seed, void *out)
{
    const uint8_t *data = (const uint8_t *)key;
    const Py_ssize_t nblocks = len / 4;

    uint32_t h1 = seed;

    const uint32_t c1 = 0xcc9e2d51;
    const uint32_t c2 = 0x1b873593;

    //----------
    // body

    const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);

    for (Py_ssize_t i = -nblocks; i; i++) {
        uint32_t k1 = getblock32(blocks, i);

        k1 *= c1;
        k1 = ROTL32(k1, 15);
        k1 *= c2;

        h1 ^= k1;
        h1 = ROTL32(h1, 13);
        h1 = h1 * 5 + 0xe6546b64;
    }

    //----------
    // tail

    const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);

    uint32_t k1 = 0;

    switch (len & 3) {
        case 3:
            k1 ^= tail[2] << 16;
        case 2:
            k1 ^= tail[1] << 8;
        case 1:
            k1 ^= tail[0];
            k1 *= c1;
            k1 = ROTL32(k1, 15);
            k1 *= c2;
            h1 ^= k1;
    };

    //----------
    // finalization

    h1 ^= len;

    h1 = fmix32(h1);

    *(uint32_t *)out = h1;
}

//-----------------------------------------------------------------------------

void
murmurhash3_x86_128(const void *key, const Py_ssize_t len, uint32_t seed,
                    void *out)
{
    const uint8_t *data = (const uint8_t *)key;
    const Py_ssize_t nblocks = len / 16;

    uint32_t h1 = seed;
    uint32_t h2 = seed;
    uint32_t h3 = seed;
    uint32_t h4 = seed;

    const uint32_t c1 = 0x239b961b;
    const uint32_t c2 = 0xab0e9789;
    const uint32_t c3 = 0x38b34ae5;
    const uint32_t c4 = 0xa1e38b93;

    //----------
    // body

    const uint32_t *blocks = (const uint32_t *)(data + nblocks * 16);

    for (Py_ssize_t i = -nblocks; i; i++) {
        uint32_t k1 = getblock32(blocks, i * 4 + 0);
        uint32_t k2 = getblock32(blocks, i * 4 + 1);
        uint32_t k3 = getblock32(blocks, i * 4 + 2);
        uint32_t k4 = getblock32(blocks, i * 4 + 3);

        k1 *= c1;
        k1 = ROTL32(k1, 15);
        k1 *= c2;
        h1 ^= k1;

        h1 = ROTL32(h1, 19);
        h1 += h2;
        h1 = h1 * 5 + 0x561ccd1b;

        k2 *= c2;
        k2 = ROTL32(k2, 16);
        k2 *= c3;
        h2 ^= k2;

        h2 = ROTL32(h2, 17);
        h2 += h3;
        h2 = h2 * 5 + 0x0bcaa747;

        k3 *= c3;
        k3 = ROTL32(k3, 17);
        k3 *= c4;
        h3 ^= k3;

        h3 = ROTL32(h3, 15);
        h3 += h4;
        h3 = h3 * 5 + 0x96cd1c35;

        k4 *= c4;
        k4 = ROTL32(k4, 18);
        k4 *= c1;
        h4 ^= k4;

        h4 = ROTL32(h4, 13);
        h4 += h1;
        h4 = h4 * 5 + 0x32ac3b17;
    }

    //----------
    // tail

    const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);

    uint32_t k1 = 0;
    uint32_t k2 = 0;
    uint32_t k3 = 0;
    uint32_t k4 = 0;

    switch (len & 15) {
        case 15:
            k4 ^= tail[14] << 16;
        case 14:
            k4 ^= tail[13] << 8;
        case 13:
            k4 ^= tail[12] << 0;
            k4 *= c4;
            k4 = ROTL32(k4, 18);
            k4 *= c1;
            h4 ^= k4;

        case 12:
            k3 ^= tail[11] << 24;
        case 11:
            k3 ^= tail[10] << 16;
        case 10:
            k3 ^= tail[9] << 8;
        case 9:
            k3 ^= tail[8] << 0;
            k3 *= c3;
            k3 = ROTL32(k3, 17);
            k3 *= c4;
            h3 ^= k3;

        case 8:
            k2 ^= tail[7] << 24;
        case 7:
            k2 ^= tail[6] << 16;
        case 6:
            k2 ^= tail[5] << 8;
        case 5:
            k2 ^= tail[4] << 0;
            k2 *= c2;
            k2 = ROTL32(k2, 16);
            k2 *= c3;
            h2 ^= k2;

        case 4:
            k1 ^= tail[3] << 24;
        case 3:
            k1 ^= tail[2] << 16;
        case 2:
            k1 ^= tail[1] << 8;
        case 1:
            k1 ^= tail[0] << 0;
            k1 *= c1;
            k1 = ROTL32(k1, 15);
            k1 *= c2;
            h1 ^= k1;
    };

    //----------
    // finalization

    h1 ^= len;
    h2 ^= len;
    h3 ^= len;
    h4 ^= len;

    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;

    h1 = fmix32(h1);
    h2 = fmix32(h2);
    h3 = fmix32(h3);
    h4 = fmix32(h4);

    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    ((uint32_t *)out)[0] = h2;
    ((uint32_t *)out)[1] = h1;
    ((uint32_t *)out)[2] = h4;
    ((uint32_t *)out)[3] = h3;
#else
    ((uint32_t *)out)[0] = h1;
    ((uint32_t *)out)[1] = h2;
    ((uint32_t *)out)[2] = h3;
    ((uint32_t *)out)[3] = h4;
#endif
}

//-----------------------------------------------------------------------------

void
murmurhash3_x64_128(const void *key, const Py_ssize_t len, const uint32_t seed,
                    void *out)
{
    const uint8_t *data = (const uint8_t *)key;
    const Py_ssize_t nblocks = len / 16;

    uint64_t h1 = seed;
    uint64_t h2 = seed;

    const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
    const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

    //----------
    // body

    const uint64_t *blocks = (const uint64_t *)(data);

    for (Py_ssize_t i = 0; i < nblocks; i++) {
        uint64_t k1 = getblock64(blocks, i * 2 + 0);
        uint64_t k2 = getblock64(blocks, i * 2 + 1);

        k1 *= c1;
        k1 = ROTL64(k1, 31);
        k1 *= c2;
        h1 ^= k1;

        h1 = ROTL64(h1, 27);
        h1 += h2;
        h1 = h1 * 5 + 0x52dce729;

        k2 *= c2;
        k2 = ROTL64(k2, 33);
        k2 *= c1;
        h2 ^= k2;

        h2 = ROTL64(h2, 31);
        h2 += h1;
        h2 = h2 * 5 + 0x38495ab5;
    }

    //----------
    // tail

    const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);

    uint64_t k1 = 0;
    uint64_t k2 = 0;

    switch (len & 15) {
        case 15:
            k2 ^= ((uint64_t)tail[14]) << 48;
        case 14:
            k2 ^= ((uint64_t)tail[13]) << 40;
        case 13:
            k2 ^= ((uint64_t)tail[12]) << 32;
        case 12:
            k2 ^= ((uint64_t)tail[11]) << 24;
        case 11:
            k2 ^= ((uint64_t)tail[10]) << 16;
        case 10:
            k2 ^= ((uint64_t)tail[9]) << 8;
        case 9:
            k2 ^= ((uint64_t)tail[8]) << 0;
            k2 *= c2;
            k2 = ROTL64(k2, 33);
            k2 *= c1;
            h2 ^= k2;

        case 8:
            k1 ^= ((uint64_t)tail[7]) << 56;
        case 7:
            k1 ^= ((uint64_t)tail[6]) << 48;
        case 6:
            k1 ^= ((uint64_t)tail[5]) << 40;
        case 5:
            k1 ^= ((uint64_t)tail[4]) << 32;
        case 4:
            k1 ^= ((uint64_t)tail[3]) << 24;
        case 3:
            k1 ^= ((uint64_t)tail[2]) << 16;
        case 2:
            k1 ^= ((uint64_t)tail[1]) << 8;
        case 1:
            k1 ^= ((uint64_t)tail[0]) << 0;
            k1 *= c1;
            k1 = ROTL64(k1, 31);
            k1 *= c2;
            h1 ^= k1;
    };

    //----------
    // finalization

    h1 ^= len;
    h2 ^= len;

    h1 += h2;
    h2 += h1;

    h1 = fmix64(h1);
    h2 = fmix64(h2);

    h1 += h2;
    h2 += h1;

    ((uint64_t *)out)[0] = h1;
    ((uint64_t *)out)[1] = h2;
}

//-----------------------------------------------------------------------------


================================================
FILE: src/mmh3/murmurhash3.h
================================================
/***
 * This file is under MIT <year> Hajime Senuma, just like other files.
 * See LICENSE for details.
 *
 * It was originally written by Austin Appleby in C++ under the public domain,
 * but ported to PEP 7 C for Python 3.6 and later by the mmh3 project.
 *
 * Any issues should be reported to https://github.com/hajimes/mmh3/issues.
 *
 * The following is the original public domain notice by Austin Appleby.
 */

//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_

// To handle 64-bit data; see https://docs.python.org/3/c-api/arg.html
#ifndef PY_SSIZE_T_CLEAN
#define PY_SSIZE_T_CLEAN
#endif
#include <Python.h>

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#include <byteswap.h>
#endif

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER) && (_MSC_VER < 1600)

typedef signed __int8 int8_t;
typedef signed __int32 int32_t;
typedef signed __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;

// Other compilers

#else  // defined(_MSC_VER)

#include <stdint.h>

#endif  // !defined(_MSC_VER)

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER)

#define FORCE_INLINE __forceinline

#include <stdlib.h>

#define ROTL32(x, y) _rotl(x, y)
#define ROTL64(x, y) _rotl64(x, y)

#define BIG_CONSTANT(x) (x)

// Other compilers

#else  // defined(_MSC_VER)

#if ((__GNUC__ > 4) || (__GNUC__ == 4 && GNUC_MINOR >= 4))
/* gcc version >= 4.4 4.1 = RHEL 5, 4.4 = RHEL 6. Don't inline for RHEL 5 gcc
 * which is 4.1*/
#define FORCE_INLINE inline __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif

static FORCE_INLINE uint32_t
rotl32(uint32_t x, int8_t r)
{
    return (x << r) | (x >> (32 - r));
}

static FORCE_INLINE uint64_t
rotl64(uint64_t x, int8_t r)
{
    return (x << r) | (x >> (64 - r));
}

#define ROTL32(x, y) rotl32(x, y)
#define ROTL64(x, y) rotl64(x, y)

#define BIG_CONSTANT(x) (x##LLU)

#endif  // !defined(_MSC_VER)

//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

static FORCE_INLINE uint32_t
getblock32(const uint32_t *p, Py_ssize_t i)
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return bswap_32(p[i]);
#else
    return p[i];
#endif
}

static FORCE_INLINE uint64_t
getblock64(const uint64_t *p, Py_ssize_t i)
{
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    return bswap_64(p[i]);
#else
    return p[i];
#endif
}

//-----------------------------------------------------------------------------
// Building blocks for multiply and rotate (MUR) operations.
// Names are taken from Google Guava's implementation

static FORCE_INLINE uint32_t
mixK1(uint32_t k1)
{
    const uint32_t c1 = 0xcc9e2d51;
    const uint32_t c2 = 0x1b873593;

    k1 *= c1;
    k1 = ROTL32(k1, 15);
    k1 *= c2;

    return k1;
}
static FORCE_INLINE uint32_t
mixH1(uint32_t h1, const uint32_t h2, const uint8_t shift, const uint32_t c1)
{
    h1 = ROTL32(h1, shift);
    h1 += h2;
    h1 = h1 * 5 + c1;

    return h1;
}
static FORCE_INLINE uint64_t
mixK_x64_128(uint64_t k1, const uint8_t shift, const uint64_t c1,
             const uint64_t c2)
{
    k1 *= c1;
    k1 = ROTL64(k1, shift);
    k1 *= c2;

    return k1;
}
static FORCE_INLINE uint64_t
mixK1_x64_128(uint64_t k1)
{
    const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
    const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

    k1 *= c1;
    k1 = ROTL64(k1, 31);
    k1 *= c2;

    return k1;
}
static FORCE_INLINE uint64_t
mixK2_x64_128(uint64_t k2)
{
    const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
    const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

    k2 *= c2;
    k2 = ROTL64(k2, 33);
    k2 *= c1;

    return k2;
}
static FORCE_INLINE uint64_t
mixH_x64_128(uint64_t h1, uint64_t h2, const uint8_t shift, const uint32_t c)
{
    h1 = ROTL64(h1, shift);
    h1 += h2;
    h1 = h1 * 5 + c;

    return h1;
}
static FORCE_INLINE uint64_t
mixK_x86_128(uint32_t k, const uint8_t shift, const uint32_t c1,
             const uint32_t c2)
{
    k *= c1;
    k = ROTL32(k, shift);
    k *= c2;

    return k;
}

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

static FORCE_INLINE uint32_t
fmix32(uint32_t h)
{
    h ^= h >> 16;
    h *= 0x85ebca6b;
    h ^= h >> 13;
    h *= 0xc2b2ae35;
    h ^= h >> 16;

    return h;
}

//----------

static FORCE_INLINE uint64_t
fmix64(uint64_t k)
{
    k ^= k >> 33;
    k *= BIG_CONSTANT(0xff51afd7ed558ccd);
    k ^= k >> 33;
    k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
    k ^= k >> 33;

    return k;
}

//-----------------------------------------------------------------------------
// Finalization function

static FORCE_INLINE void
digest_x64_128_impl(uint64_t h1, uint64_t h2, const uint64_t k1,
                    const uint64_t k2, const Py_ssize_t len, const char *out)
{
    h1 ^= mixK1_x64_128(k1);
    h2 ^= mixK2_x64_128(k2);
    h1 ^= len;
    h2 ^= len;

    h1 += h2;
    h2 += h1;

    h1 = fmix64(h1);
    h2 = fmix64(h2);

    h1 += h2;
    h2 += h1;

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    ((uint64_t *)out)[0] = bswap_64(h1);
    ((uint64_t *)out)[1] = bswap_64(h2);
#else
    ((uint64_t *)out)[0] = h1;
    ((uint64_t *)out)[1] = h2;
#endif
}

static FORCE_INLINE void
digest_x86_128_impl(uint32_t h1, uint32_t h2, uint32_t h3, uint32_t h4,
                    const uint32_t k1, const uint32_t k2, const uint32_t k3,
                    const uint32_t k4, const Py_ssize_t len, const char *out)
{
    const uint32_t c1 = 0x239b961b;
    const uint32_t c2 = 0xab0e9789;
    const uint32_t c3 = 0x38b34ae5;
    const uint32_t c4 = 0xa1e38b93;

    h1 ^= mixK_x86_128(k1, 15, c1, c2);
    h2 ^= mixK_x86_128(k2, 16, c2, c3);
    h3 ^= mixK_x86_128(k3, 17, c3, c4);
    h4 ^= mixK_x86_128(k4, 18, c4, c1);
    h1 ^= len;
    h2 ^= len;
    h3 ^= len;
    h4 ^= len;

    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;

    h1 = fmix32(h1);
    h2 = fmix32(h2);
    h3 = fmix32(h3);
    h4 = fmix32(h4);

    h1 += h2;
    h1 += h3;
    h1 += h4;
    h2 += h1;
    h3 += h1;
    h4 += h1;

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
    ((uint32_t *)out)[0] = bswap_32(h1);
    ((uint32_t *)out)[1] = bswap_32(h2);
    ((uint32_t *)out)[2] = bswap_32(h3);
    ((uint32_t *)out)[3] = bswap_32(h4);
#else
    ((uint32_t *)out)[0] = h1;
    ((uint32_t *)out)[1] = h2;
    ((uint32_t *)out)[2] = h3;
    ((uint32_t *)out)[3] = h4;
#endif
}

//-----------------------------------------------------------------------------

void
murmurhash3_x86_32(const void *key, Py_ssize_t len, uint32_t seed, void *out);

void
murmurhash3_x86_128(const void *key, Py_ssize_t len, uint32_t seed, void *out);

void
murmurhash3_x64_128(const void *key, Py_ssize_t len, uint32_t seed, void *out);

//-----------------------------------------------------------------------------

#endif  // _MURMURHASH3_H_


================================================
FILE: src/mmh3/py.typed
================================================


================================================
FILE: tests/helper.py
================================================
"""Helper functions for tests."""


# see also https://stackoverflow.com/a/1375939
def u32_to_s32(v: int) -> int:
    """Convert unsigned 32-bit integer to signed 32-bit integer.

    Args:
        v: Unsigned 32-bit integer.

    Returns:
        Signed 32-bit representation of the input.
    """
    if v & 0x80000000:
        return -0x100000000 + v
    return v


================================================
FILE: tests/test_doctrings.py
================================================
# pylint: disable=missing-module-docstring,missing-function-docstring
import mmh3


def test_function_docstrings() -> None:
    assert "__doc__" in dir(mmh3.hash)
    assert mmh3.hash.__doc__ is not None
    assert mmh3.hash.__doc__.startswith("hash(key, seed=0, signed=True) -> int\n\n")

    assert "__doc__" in dir(mmh3.hash_from_buffer)
    assert mmh3.hash_from_buffer.__doc__ is not None
    assert mmh3.hash_from_buffer.__doc__.startswith(
        "hash_from_buffer(key, seed=0, signed=True) -> int\n\n"
    )

    assert "__doc__" in dir(mmh3.hash64)
    assert mmh3.hash64.__doc__ is not None
    assert mmh3.hash64.__doc__.startswith(
        "hash64(key, seed=0, x64arch=True, signed=True) -> tuple[int, int]\n\n"
    )

    assert "__doc__" in dir(mmh3.hash128)
    assert mmh3.hash128.__doc__ is not None
    assert mmh3.hash128.__doc__.startswith(
        "hash128(key, seed=0, x64arch=True, signed=False) -> int\n\n"
    )

    assert "__doc__" in dir(mmh3.hash_bytes)
    assert mmh3.hash_bytes.__doc__ is not None
    assert mmh3.hash_bytes.__doc__.startswith(
        "hash_bytes(key, seed=0, x64arch=True) -> bytes\n\n"
    )


def test_module_docstring() -> None:
    assert "__doc__" in dir(mmh3)
    assert mmh3.__doc__ is not None
    assert mmh3.__doc__.startswith("A Python front-end to MurmurHash3")


================================================
FILE: tests/test_free_threading.py
================================================
# pylint: disable=missing-module-docstring,missing-function-docstring
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor
from typing import Any

import mmh3


def run_threaded(func: Callable[..., Any], num_threads: int = 8) -> None:
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = [executor.submit(func) for _ in range(num_threads)]
        for future in futures:
            future.result()  # wait for all threads to complete


def test_parallel_hasher_mmh3_32_update() -> None:
    hasher = mmh3.mmh3_32()

    def closure() -> None:
        for _ in range(1000):
            hasher.update(b"foo")

    run_threaded(closure, num_threads=8)

    assert hasher.sintdigest() == mmh3.hash(b"foo" * 8000)


def test_parallel_hasher_mmh3_x64_128_update() -> None:
    hasher = mmh3.mmh3_x64_128()

    def closure() -> None:
        for _ in range(1000):
            hasher.update(b"foo")

    run_threaded(closure, num_threads=8)

    assert hasher.sintdigest() == mmh3.hash128(b"foo" * 8000, x64arch=True, signed=True)


def test_parallel_hasher_mmh3_x86_128_update() -> None:
    hasher = mmh3.mmh3_x86_128()

    def closure() -> None:
        for _ in range(1000):
            hasher.update(b"foo")

    run_threaded(closure, num_threads=8)

    assert hasher.sintdigest() == mmh3.hash128(
        b"foo" * 8000, x64arch=False, signed=True
    )


================================================
FILE: tests/test_invalid_inputs.py
================================================
# pylint: disable=missing-module-docstring, missing-function-docstring
# pylint: disable=no-value-for-parameter, too-many-function-args
from typing import no_type_check

import pytest

import mmh3


@no_type_check
def test_hash_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.hash()
    with pytest.raises(TypeError):
        mmh3.hash(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.hash(b"hello, world", 42, True, 1234)
    with pytest.raises(TypeError):
        mmh3.hash(b"hello, world", seed="42")
    with pytest.raises(TypeError):
        mmh3.hash([1, 2, 3], 42)
    # pylint: disable=redundant-keyword-arg
    with pytest.raises(TypeError):
        mmh3.hash(b"hello, world", key=b"42")


@no_type_check
def test_hash_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.hash(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.hash(b"hello, world", 2**32)


@no_type_check
def test_hash128_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.hash128()
    with pytest.raises(TypeError):
        mmh3.hash128(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.hash128(b"hello, world", 42, True, False, 1234)
    with pytest.raises(TypeError):
        mmh3.hash128(b"hello, world", seed="42")
    with pytest.raises(TypeError):
        mmh3.hash128([1, 2, 3], 42)


@no_type_check
def test_hash128_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.hash128(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.hash128(b"hello, world", 2**32)


@no_type_check
def test_hash64_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.hash64()
    with pytest.raises(TypeError):
        mmh3.hash64(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.hash64(b"hello, world", 42, True, False, 1234)
    with pytest.raises(TypeError):
        mmh3.hash64(b"hello, world", seed="42")
    with pytest.raises(TypeError):
        mmh3.hash64([1, 2, 3], 42)


@no_type_check
def test_hash64_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.hash64(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.hash64(b"hello, world", 2**32)


@no_type_check
def test_hash_bytes_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.hash_bytes()
    with pytest.raises(TypeError):
        mmh3.hash_bytes(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.hash_bytes(b"hello, world", 42, True, 1234)
    with pytest.raises(TypeError):
        mmh3.hash_bytes(b"hello, world", seed="42")
    with pytest.raises(TypeError):
        mmh3.hash_bytes([1, 2, 3], 42)


@no_type_check
def test_hash_bytes_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.hash_bytes(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.hash_bytes(b"hello, world", 2**32)


@no_type_check
def test_hash_from_buffer_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.hash_from_buffer()
    with pytest.raises(TypeError):
        mmh3.hash_from_buffer(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.hash_from_buffer(b"hello, world", 42, True, 1234)
    with pytest.raises(TypeError):
        mmh3.hash_from_buffer(b"hello, world", seed="42")
    with pytest.raises(TypeError):
        mmh3.hash_from_buffer([1, 2, 3], 42)


@no_type_check
def test_hash_from_buffer_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.hash_from_buffer(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.hash_from_buffer(b"hello, world", 2**32)


@no_type_check
def test_mmh3_32_digest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_32_digest()
    with pytest.raises(TypeError):
        mmh3.mmh3_32_digest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_32_digest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_digest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_digest([1, 2, 3], 42)


@no_type_check
def test_mmh3_32_digest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_32_digest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_32_digest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_32_sintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_32_sintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_32_sintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_32_sintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_sintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_sintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_32_sintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_32_sintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_32_sintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_32_uintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_32_uintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_32_uintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_32_uintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_uintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_32_uintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_32_uintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_32_uintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_32_uintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_digest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_digest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_digest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_digest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_digest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_digest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_digest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_digest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_digest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_sintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_sintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_sintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_sintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_sintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_sintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_sintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_sintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_sintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_uintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_uintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_uintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_uintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_uintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_uintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_uintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_uintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_uintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_stupledigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_stupledigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_stupledigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_stupledigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_stupledigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_stupledigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_stupledigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_stupledigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_stupledigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_utupledigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_utupledigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_utupledigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_utupledigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_utupledigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128_utupledigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_utupledigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_utupledigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128_utupledigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_digest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_digest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_digest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_digest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_digest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_digest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_digest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_digest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_digest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_sintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_sintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_sintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_sintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_sintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_sintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_sintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_sintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_sintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_uintdigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_uintdigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_uintdigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_uintdigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_uintdigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_uintdigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_uintdigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_uintdigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_uintdigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_stupledigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_stupledigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_stupledigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_stupledigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_stupledigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_stupledigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_stupledigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_stupledigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_stupledigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_utupledigest_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_utupledigest()
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_utupledigest(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_utupledigest("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_utupledigest(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128_utupledigest([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_utupledigest_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_utupledigest(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128_utupledigest(b"hello, world", 2**32)


@no_type_check
def test_mmh3_32_init_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_32(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_32("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_32(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_32([1, 2, 3], 42)


@no_type_check
def test_mmh3_32_init_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_32(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_32(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x64_128_init_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x64_128([1, 2, 3], 42)


@no_type_check
def test_mmh3_x64_128_init_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x64_128(b"hello, world", 2**32)


@no_type_check
def test_mmh3_x86_128_init_raises_typeerror() -> None:
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128(b"hello, world", 42, 1234)
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128("hello, world")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128(b"hello, world", "42")
    with pytest.raises(TypeError):
        mmh3.mmh3_x86_128([1, 2, 3], 42)


@no_type_check
def test_mmh3_x86_128_init_raises_valueerror() -> None:
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128(b"hello, world", -1)
    with pytest.raises(ValueError):
        mmh3.mmh3_x86_128(b"hello, world", 2**32)


================================================
FILE: tests/test_mmh3.py
================================================
# pylint: disable=missing-module-docstring,missing-function-docstring
import sys

import mmh3
from helper import u32_to_s32


def test_hash() -> None:
    assert mmh3.hash("foo") == -156908512

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    assert mmh3.hash(b"", seed=0) == 0
    assert mmh3.hash(b"", seed=1) == 0x514E28B7
    assert mmh3.hash(b"", seed=0xFFFFFFFF) == u32_to_s32(0x81F16F39)
    assert mmh3.hash(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B)
    assert mmh3.hash(b"\x21\x43\x65\x87", 0x5082EDEE) == u32_to_s32(0x2362F9DE)
    assert mmh3.hash(b"\x21\x43\x65", 0) == u32_to_s32(0x7E4A8634)
    assert mmh3.hash(b"\x21\x43", 0) == u32_to_s32(0xA0F7B07A)
    assert mmh3.hash(b"\x21", 0) == u32_to_s32(0x72661CF4)
    assert mmh3.hash(b"\xff\xff\xff\xff", 0) == u32_to_s32(0x76293B50)
    assert mmh3.hash(b"\x00\x00\x00\x00", 0) == u32_to_s32(0x2362F9DE)
    assert mmh3.hash(b"\x00\x00\x00", 0) == u32_to_s32(0x85F0B427)
    assert mmh3.hash(b"\x00\x00", 0) == u32_to_s32(0x30F4C306)
    assert mmh3.hash(b"\x00", 0) == u32_to_s32(0x514E28B7)

    assert mmh3.hash("aaaa", 0x9747B28C) == u32_to_s32(0x5A97808A)
    assert mmh3.hash("aaa", 0x9747B28C) == u32_to_s32(0x283E0130)
    assert mmh3.hash("aa", 0x9747B28C) == u32_to_s32(0x5D211726)
    assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.hash("abcd", 0x9747B28C) == u32_to_s32(0xF0478627)
    assert mmh3.hash("abc", 0x9747B28C) == u32_to_s32(0xC84A62DD)
    assert mmh3.hash("ab", 0x9747B28C) == u32_to_s32(0x74875592)
    assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.hash("Hello, world!", 0x9747B28C) == u32_to_s32(0x24884CBA)

    assert mmh3.hash("ππππππππ".encode(), 0x9747B28C) == u32_to_s32(0xD58063C1)

    assert mmh3.hash("a" * 256, 0x9747B28C) == u32_to_s32(0x37405BDC)

    assert mmh3.hash("abc", 0) == u32_to_s32(0xB3DD93FA)
    assert mmh3.hash(
        "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0
    ) == u32_to_s32(0xEE925B90)

    assert mmh3.hash(
        "The quick brown fox jumps over the lazy dog", 0x9747B28C
    ) == u32_to_s32(0x2FA826CD)


def test_hash_unsigned() -> None:
    assert mmh3.hash("foo", signed=False) == 4138058784

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    assert mmh3.hash(b"", seed=0, signed=False) == 0
    assert mmh3.hash(b"", seed=1, signed=False) == 0x514E28B7
    assert mmh3.hash(b"", seed=0xFFFFFFFF, signed=False) == 0x81F16F39
    assert mmh3.hash(b"\x21\x43\x65\x87", 0, signed=False) == 0xF55B516B
    assert mmh3.hash(b"\x21\x43\x65\x87", 0x5082EDEE, signed=False) == 0x2362F9DE
    assert mmh3.hash(b"\x21\x43\x65", 0, signed=False) == 0x7E4A8634
    assert mmh3.hash(b"\x21\x43", 0, signed=False) == 0xA0F7B07A
    assert mmh3.hash(b"\x21", 0, signed=False) == 0x72661CF4
    assert mmh3.hash(b"\xff\xff\xff\xff", 0, signed=False) == 0x76293B50
    assert mmh3.hash(b"\x00\x00\x00\x00", 0, signed=False) == 0x2362F9DE
    assert mmh3.hash(b"\x00\x00\x00", 0, signed=False) == 0x85F0B427
    assert mmh3.hash(b"\x00\x00", 0, signed=False) == 0x30F4C306
    assert mmh3.hash(b"\x00", 0, signed=False) == 0x514E28B7

    assert mmh3.hash("aaaa", 0x9747B28C, signed=False) == 0x5A97808A
    assert mmh3.hash("aaa", 0x9747B28C, signed=False) == 0x283E0130
    assert mmh3.hash("aa", 0x9747B28C, signed=False) == 0x5D211726
    assert mmh3.hash("a", 0x9747B28C, signed=False) == 0x7FA09EA6

    assert mmh3.hash("abcd", 0x9747B28C, signed=False) == 0xF0478627
    assert mmh3.hash("abc", 0x9747B28C, signed=False) == 0xC84A62DD
    assert mmh3.hash("ab", 0x9747B28C, signed=False) == 0x74875592
    assert mmh3.hash("a", 0x9747B28C, signed=False) == 0x7FA09EA6

    assert mmh3.hash("Hello, world!", 0x9747B28C, signed=False) == 0x24884CBA

    assert mmh3.hash("ππππππππ".encode(), 0x9747B28C, signed=False) == 0xD58063C1

    assert mmh3.hash("a" * 256, 0x9747B28C, signed=False) == 0x37405BDC

    assert mmh3.hash("abc", 0, signed=False) == 0xB3DD93FA
    assert (
        mmh3.hash(
            "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0, signed=False
        )
        == 0xEE925B90
    )

    assert (
        mmh3.hash(
            "The quick brown fox jumps over the lazy dog", 0x9747B28C, signed=False
        )
        == 0x2FA826CD
    )

    assert (
        mmh3.hash(
            "The quick brown fox jumps over the lazy dog", 0x9747B28C, signed=False
        )
        == 0x2FA826CD
    )


def test_hash2() -> None:
    assert mmh3.hash("foo") == -156908512

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    assert mmh3.hash(b"", seed=0) == 0
    assert mmh3.hash(b"", seed=1) == 0x514E28B7
    assert mmh3.hash(b"", seed=0xFFFFFFFF) == u32_to_s32(0x81F16F39)
    assert mmh3.hash(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B)
    assert mmh3.hash(b"\x21\x43\x65\x87", 0x5082EDEE) == u32_to_s32(0x2362F9DE)
    assert mmh3.hash(b"\x21\x43\x65", 0) == u32_to_s32(0x7E4A8634)
    assert mmh3.hash(b"\x21\x43", 0) == u32_to_s32(0xA0F7B07A)
    assert mmh3.hash(b"\x21", 0) == u32_to_s32(0x72661CF4)
    assert mmh3.hash(b"\xff\xff\xff\xff", 0) == u32_to_s32(0x76293B50)
    assert mmh3.hash(b"\x00\x00\x00\x00", 0) == u32_to_s32(0x2362F9DE)
    assert mmh3.hash(b"\x00\x00\x00", 0) == u32_to_s32(0x85F0B427)
    assert mmh3.hash(b"\x00\x00", 0) == u32_to_s32(0x30F4C306)
    assert mmh3.hash(b"\x00", 0) == u32_to_s32(0x514E28B7)

    assert mmh3.hash("aaaa", 0x9747B28C) == u32_to_s32(0x5A97808A)
    assert mmh3.hash("aaa", 0x9747B28C) == u32_to_s32(0x283E0130)
    assert mmh3.hash("aa", 0x9747B28C) == u32_to_s32(0x5D211726)
    assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.hash("abcd", 0x9747B28C) == u32_to_s32(0xF0478627)
    assert mmh3.hash("abc", 0x9747B28C) == u32_to_s32(0xC84A62DD)
    assert mmh3.hash("ab", 0x9747B28C) == u32_to_s32(0x74875592)
    assert mmh3.hash("a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.hash("Hello, world!", 0x9747B28C) == u32_to_s32(0x24884CBA)

    assert mmh3.hash("ππππππππ".encode(), 0x9747B28C) == u32_to_s32(0xD58063C1)

    assert mmh3.hash("a" * 256, 0x9747B28C) == u32_to_s32(0x37405BDC)

    assert mmh3.hash("abc", 0) == u32_to_s32(0xB3DD93FA)
    assert mmh3.hash(
        "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0
    ) == u32_to_s32(0xEE925B90)

    assert mmh3.hash(
        "The quick brown fox jumps over the lazy dog", 0x9747B28C
    ) == u32_to_s32(0x2FA826CD)


def test_hash_from_buffer() -> None:
    mview = memoryview(b"foo")
    assert mmh3.hash_from_buffer(mview) == -156908512
    assert mmh3.hash_from_buffer(mview, signed=False) == 4138058784


def test_hash_bytes() -> None:
    assert mmh3.hash_bytes("foo") == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"
    assert (
        mmh3.hash_bytes("foo", 0, True)
        == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"
    )

    # Test vectors from https://github.com/PeterScott/murmur3/blob/master/test.c
    assert mmh3.hash_bytes("Hello, world!", 123, x64arch=False) == (
        0x9E37C886A41621625A1AACD761C9129E
    ).to_bytes(16, "little")
    assert mmh3.hash_bytes("", 123, x64arch=False) == (
        0x26F3E79926F3E79926F3E799FEDC5245
    ).to_bytes(16, "little")


def test_hash64() -> None:
    assert mmh3.hash64("foo") == (-2129773440516405919, 9128664383759220103)
    assert mmh3.hash64("foo", signed=False) == (
        16316970633193145697,
        9128664383759220103,
    )

    assert mmh3.hash64("The quick brown fox jumps over the lazy dog", 0x9747B28C) == (
        8325606756057297185,
        -484854449282476315,
    )
    assert mmh3.hash64(
        "The quick brown fox jumps over the lazy dog", 0x9747B28C, signed=False
    ) == (
        8325606756057297185,
        17961889624427075301,
    )
    assert mmh3.hash64("foo", signed=False, x64arch=True) == (
        16316970633193145697,
        9128664383759220103,
    )

    # Test vectors from https://github.com/PeterScott/murmur3/blob/master/test.c
    assert mmh3.hash64("Hello, world!", 123, signed=False, x64arch=False) == (
        0x5A1AACD761C9129E,
        0x9E37C886A4162162,
    )

    assert mmh3.hash64("", 123, False, False) == (
        0x26F3E799FEDC5245,
        0x26F3E79926F3E799,
    )


def test_hash128() -> None:
    assert mmh3.hash128("foo") == 168394135621993849475852668931176482145
    assert mmh3.hash128("foo", 42) == 215966891540331383248189432718888555506
    assert (
        mmh3.hash128("foo", 42, signed=False) == 215966891540331383248189432718888555506
    )
    assert (
        mmh3.hash128("foo", 42, signed=True) == -124315475380607080215185174712879655950
    )
    # Test vectors from https://github.com/PeterScott/murmur3/blob/master/test.c
    assert (
        mmh3.hash128("Hello, world!", 123, signed=False, x64arch=False)
        == 0x9E37C886A41621625A1AACD761C9129E
    )
    assert mmh3.hash128("", 123, False, False) == 0x26F3E79926F3E79926F3E799FEDC5245


def test_mmh3_32_digest() -> None:
    assert mmh3.mmh3_32_digest(b"") == b"\0\0\0\0"
    assert mmh3.mmh3_32_digest(b"", 0) == b"\0\0\0\0"
    assert mmh3.mmh3_32_digest(b"\x21\x43\x65\x87", 0) == (0xF55B516B).to_bytes(
        4, "little"
    )
    assert mmh3.mmh3_32_digest(b"\x21\x43\x65\x87", u32_to_s32(0x5082EDEE)) == (
        0x2362F9DE
    ).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\x21\x43\x65", 0) == (0x7E4A8634).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\x21\x43", 0) == (0xA0F7B07A).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\x21", 0) == (0x72661CF4).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\xff\xff\xff\xff", 0) == (0x76293B50).to_bytes(
        4, "little"
    )
    assert mmh3.mmh3_32_digest(b"\x00\x00\x00\x00", 0) == (0x2362F9DE).to_bytes(
        4, "little"
    )
    assert mmh3.mmh3_32_digest(b"\x00\x00\x00", 0) == (0x85F0B427).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\x00\x00", 0) == (0x30F4C306).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"\x00", 0) == (0x514E28B7).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(b"aaaa", 0x9747B28C) == (0x5A97808A).to_bytes(
        4, "little"
    )
    assert mmh3.mmh3_32_digest(b"aaa", 0x9747B28C) == (0x283E0130).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"aa", 0x9747B28C) == (0x5D211726).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(b"abcd", 0x9747B28C) == (0xF0478627).to_bytes(
        4, "little"
    )
    assert mmh3.mmh3_32_digest(b"abc", 0x9747B28C) == (0xC84A62DD).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"ab", 0x9747B28C) == (0x74875592).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(b"a", 0x9747B28C) == (0x7FA09EA6).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(b"Hello, world!", 0x9747B28C) == (0x24884CBA).to_bytes(
        4, "little"
    )

    assert mmh3.mmh3_32_digest("ππππππππ".encode(), 0x9747B28C) == (
        0xD58063C1
    ).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(b"a" * 256, 0x9747B28C) == (0x37405BDC).to_bytes(
        4, "little"
    )

    assert mmh3.mmh3_32_digest(b"abc", 0) == (0xB3DD93FA).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(
        b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0
    ) == (0xEE925B90).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(
        b"The quick brown fox jumps over the lazy dog", 0x9747B28C
    ) == (0x2FA826CD).to_bytes(4, "little")

    assert mmh3.mmh3_32_digest(bytearray(b"aaaa"), 0x9747B28C) == (0x5A97808A).to_bytes(
        4, "little"
    )
    v = memoryview(b"aaaa")
    assert mmh3.mmh3_32_digest(v, 0x9747B28C) == (0x5A97808A).to_bytes(4, "little")
    assert mmh3.mmh3_32_digest(v[1:3], 0x9747B28C) == (0x5D211726).to_bytes(4, "little")


def test_mmh3_sintdigest() -> None:
    assert mmh3.mmh3_32_sintdigest(b"foo") == -156908512
    assert mmh3.mmh3_32_sintdigest(bytearray(b"foo")) == -156908512
    assert mmh3.mmh3_32_sintdigest(memoryview(b"foobar")[0:3]) == -156908512

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    assert mmh3.mmh3_32_sintdigest(b"", 0) == 0
    assert mmh3.mmh3_32_sintdigest(b"", 1) == 0x514E28B7
    assert mmh3.mmh3_32_sintdigest(b"", 0xFFFFFFFF) == u32_to_s32(0x81F16F39)
    assert mmh3.mmh3_32_sintdigest(b"\x21\x43\x65\x87", 0) == u32_to_s32(0xF55B516B)
    assert mmh3.mmh3_32_sintdigest(
        b"\x21\x43\x65\x87", u32_to_s32(0x5082EDEE)
    ) == u32_to_s32(0x2362F9DE)
    assert mmh3.mmh3_32_sintdigest(b"\x21\x43\x65", 0) == u32_to_s32(0x7E4A8634)
    assert mmh3.mmh3_32_sintdigest(b"\x21\x43", 0) == u32_to_s32(0xA0F7B07A)
    assert mmh3.mmh3_32_sintdigest(b"\x21", 0) == u32_to_s32(0x72661CF4)
    assert mmh3.mmh3_32_sintdigest(b"\xff\xff\xff\xff", 0) == u32_to_s32(0x76293B50)
    assert mmh3.mmh3_32_sintdigest(b"\x00\x00\x00\x00", 0) == u32_to_s32(0x2362F9DE)
    assert mmh3.mmh3_32_sintdigest(b"\x00\x00\x00", 0) == u32_to_s32(0x85F0B427)
    assert mmh3.mmh3_32_sintdigest(b"\x00\x00", 0) == u32_to_s32(0x30F4C306)
    assert mmh3.mmh3_32_sintdigest(b"\x00", 0) == u32_to_s32(0x514E28B7)

    assert mmh3.mmh3_32_sintdigest(b"aaaa", 0x9747B28C) == u32_to_s32(0x5A97808A)
    assert mmh3.mmh3_32_sintdigest(b"aaa", 0x9747B28C) == u32_to_s32(0x283E0130)
    assert mmh3.mmh3_32_sintdigest(b"aa", 0x9747B28C) == u32_to_s32(0x5D211726)
    assert mmh3.mmh3_32_sintdigest(b"a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.mmh3_32_sintdigest(b"abcd", 0x9747B28C) == u32_to_s32(0xF0478627)
    assert mmh3.mmh3_32_sintdigest(b"abc", 0x9747B28C) == u32_to_s32(0xC84A62DD)
    assert mmh3.mmh3_32_sintdigest(b"ab", 0x9747B28C) == u32_to_s32(0x74875592)
    assert mmh3.mmh3_32_sintdigest(b"a", 0x9747B28C) == u32_to_s32(0x7FA09EA6)

    assert mmh3.mmh3_32_sintdigest(b"Hello, world!", 0x9747B28C) == u32_to_s32(
        0x24884CBA
    )

    assert mmh3.mmh3_32_sintdigest("ππππππππ".encode(), 0x9747B28C) == u32_to_s32(
        0xD58063C1
    )

    assert mmh3.mmh3_32_sintdigest(b"a" * 256, 0x9747B28C) == u32_to_s32(0x37405BDC)

    assert mmh3.mmh3_32_sintdigest(b"abc", 0) == u32_to_s32(0xB3DD93FA)
    assert mmh3.mmh3_32_sintdigest(
        b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0
    ) == u32_to_s32(0xEE925B90)

    assert mmh3.mmh3_32_sintdigest(
        b"The quick brown fox jumps over the lazy dog", 0x9747B28C
    ) == u32_to_s32(0x2FA826CD)


def test_mmh3_uintdigest() -> None:
    assert mmh3.mmh3_32_uintdigest(b"foo") == 4138058784
    assert mmh3.mmh3_32_uintdigest(bytearray(b"foo")) == 4138058784
    assert mmh3.mmh3_32_uintdigest(memoryview(b"foobar")[0:3]) == 4138058784

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    assert mmh3.mmh3_32_uintdigest(b"") == 0
    assert mmh3.mmh3_32_uintdigest(b"", 0) == 0
    assert mmh3.mmh3_32_uintdigest(b"", 1) == 0x514E28B7
    assert mmh3.mmh3_32_uintdigest(b"", 0xFFFFFFFF) == 0x81F16F39
    assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65\x87", 0) == 0xF55B516B
    assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65\x87", 0x5082EDEE) == 0x2362F9DE
    assert mmh3.mmh3_32_uintdigest(b"\x21\x43\x65", 0) == 0x7E4A8634
    assert mmh3.mmh3_32_uintdigest(b"\x21\x43", 0) == 0xA0F7B07A
    assert mmh3.mmh3_32_uintdigest(b"\x21", 0) == 0x72661CF4
    assert mmh3.mmh3_32_uintdigest(b"\xff\xff\xff\xff", 0) == 0x76293B50
    assert mmh3.mmh3_32_uintdigest(b"\x00\x00\x00\x00", 0) == 0x2362F9DE
    assert mmh3.mmh3_32_uintdigest(b"\x00\x00\x00", 0) == 0x85F0B427
    assert mmh3.mmh3_32_uintdigest(b"\x00\x00", 0) == 0x30F4C306
    assert mmh3.mmh3_32_uintdigest(b"\x00", 0) == 0x514E28B7

    assert mmh3.mmh3_32_uintdigest(b"aaaa", 0x9747B28C) == 0x5A97808A
    assert mmh3.mmh3_32_uintdigest(b"aaa", 0x9747B28C) == 0x283E0130
    assert mmh3.mmh3_32_uintdigest(b"aa", 0x9747B28C) == 0x5D211726
    assert mmh3.mmh3_32_uintdigest(b"a", 0x9747B28C) == 0x7FA09EA6

    assert mmh3.mmh3_32_uintdigest(b"abcd", 0x9747B28C) == 0xF0478627
    assert mmh3.mmh3_32_uintdigest(b"abc", 0x9747B28C) == 0xC84A62DD
    assert mmh3.mmh3_32_uintdigest(b"ab", 0x9747B28C) == 0x74875592
    assert mmh3.mmh3_32_uintdigest(b"a", 0x9747B28C) == 0x7FA09EA6

    assert mmh3.mmh3_32_uintdigest(b"Hello, world!", 0x9747B28C) == 0x24884CBA

    assert mmh3.mmh3_32_uintdigest("ππππππππ".encode(), 0x9747B28C) == 0xD58063C1

    assert mmh3.mmh3_32_uintdigest(b"a" * 256, 0x9747B28C) == 0x37405BDC

    assert mmh3.mmh3_32_uintdigest(b"abc", 0) == 0xB3DD93FA
    assert (
        mmh3.mmh3_32_uintdigest(
            b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0
        )
        == 0xEE925B90
    )

    assert (
        mmh3.mmh3_32_uintdigest(
            b"The quick brown fox jumps over the lazy dog", 0x9747B28C
        )
        == 0x2FA826CD
    )

    assert (
        mmh3.mmh3_32_uintdigest(
            b"The quick brown fox jumps over the lazy dog", 0x9747B28C
        )
        == 0x2FA826CD
    )


def test_mmh3_x64_128_digest() -> None:
    assert (
        mmh3.mmh3_x64_128_digest(b"foo")
        == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"
    )

    assert (
        mmh3.mmh3_x64_128_digest(
            b"The quick brown fox jumps over the lazy dog", 0x9747B28C
        )
        == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"
    )

    v = bytearray(b"bar boo bar")
    mv = memoryview(v)
    v[4] = ord("f")

    assert (
        mmh3.mmh3_x64_128_digest(mv[4:7])
        == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"
    )


def test_mmh3_x64_128_sintdigest() -> None:
    assert mmh3.mmh3_x64_128_sintdigest(b"") == 0

    assert (
        mmh3.mmh3_x64_128_sintdigest(
            b"The quick brown fox jumps over the lazy dog", 0x9747B28C
        )
        == -8943985938913228316176695348732677855
    )


def test_mmh3_x64_128_uintdigest() -> None:
    assert mmh3.mmh3_x64_128_uintdigest(b"") == 0

    assert (
        mmh3.mmh3_x64_128_uintdigest(b"foo", 42)
        == 215966891540331383248189432718888555506
    )


def test_mmh3_x64_128_stupledigest() -> None:
    assert mmh3.mmh3_x64_128_stupledigest(b"") == (0, 0)

    assert mmh3.mmh3_x64_128_stupledigest(
        memoryview(b"The quick brown fox jumps over the lazy dog"), 0x9747B28C
    ) == (
        8325606756057297185,
        -484854449282476315,
    )


def test_mmh3_x64_128_utupledigest() -> None:
    assert mmh3.mmh3_x64_128_utupledigest(b"") == (0, 0)

    assert mmh3.mmh3_x64_128_utupledigest(memoryview(b"foo")) == (
        16316970633193145697,
        9128664383759220103,
    )


def test_mmh3_x86_128_digest() -> None:
    assert mmh3.mmh3_x86_128_digest(b"", 123) == (
        0x26F3E79926F3E79926F3E799FEDC5245
    ).to_bytes(16, "little")

    assert mmh3.mmh3_x86_128_digest(b"Hello, world!", 123) == (
        0x9E37C886A41621625A1AACD761C9129E
    ).to_bytes(16, "little")

    assert mmh3.mmh3_x86_128_digest(bytearray(b"Hello, world!"), 123) == (
        0x9E37C886A41621625A1AACD761C9129E
    ).to_bytes(16, "little")

    v = bytearray(b"hello, world!!!")
    mv = memoryview(v)
    v[0] = ord("H")

    assert mmh3.mmh3_x86_128_digest(mv[0:13], 123) == (
        0x9E37C886A41621625A1AACD761C9129E
    ).to_bytes(16, "little")


def test_mmh3_x86_128_sintdigest() -> None:
    assert mmh3.mmh3_x64_128_sintdigest(b"") == 0

    assert (
        mmh3.mmh3_x64_128_sintdigest(
            b"The quick brown fox jumps over the lazy dog", 0x9747B28C
        )
        == -8943985938913228316176695348732677855
    )


def test_mmh3_x86_128_uintdigest() -> None:
    assert mmh3.mmh3_x64_128_uintdigest(b"", 0) == 0

    # Test vector from https://github.com/PeterScott/murmur3/blob/master/test.c
    assert (
        mmh3.mmh3_x86_128_uintdigest(b"Hello, world!", 123)
        == 0x9E37C886A41621625A1AACD761C9129E
    )


def test_mmh3_x86_128_stupledigest() -> None:
    assert mmh3.mmh3_x86_128_stupledigest(b"", 0) == (0, 0)

    assert mmh3.mmh3_x86_128_stupledigest(
        memoryview(b"The quick brown fox jumps over the lazy dog"), 0x9747B28C
    ) == (
        5528275682885686622,
        -3623575540584727908,
    )


def test_mmh3_x86_128_utupledigest() -> None:
    assert mmh3.mmh3_x86_128_utupledigest(b"", 0) == (0, 0)

    # Test vector from https://github.com/PeterScott/murmur3/blob/master/test.c
    assert mmh3.mmh3_x86_128_utupledigest(memoryview(b"Hello, world!"), 123) == (
        0x5A1AACD761C9129E,
        0x9E37C886A4162162,
    )


def test_64bit() -> None:
    if sys.maxsize < (1 << 32):  # Skip this test under 32-bit environments
        return
    a = bytes(2**32 + 1)
    assert mmh3.hash(a) == -1710109261
    assert (
        mmh3.hash_bytes(a) == b"\x821\x93\x0c\xe7\xa8\x02\x9d\xe5 \xa6\xf9\xeb8\xd6\x0e"
    )


# from hex string "0xff00de" to integer
def hex_to_int(hex_str: str) -> int:
    return int(hex_str, 16)


================================================
FILE: tests/test_mmh3_hasher.py
================================================
# pylint: disable=missing-module-docstring,missing-function-docstring
import mmh3
from helper import u32_to_s32


def test_mmh3_32_digest() -> None:
    hasher = mmh3.mmh3_32()
    hasher.update(b"")
    assert hasher.digest() == b"\x00\x00\x00\x00"

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello, world!")
    assert hasher.digest() == b"\xba\x4c\x88\x24"

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello,")
    hasher.update(b" world!")
    assert hasher.digest() == b"\xba\x4c\x88\x24"

    hasher = mmh3.mmh3_32(b"", 0x9747B28C)
    hasher.update(b"Hello,")
    hasher.update(b" world!")
    assert hasher.digest() == b"\xba\x4c\x88\x24"

    hasher = mmh3.mmh3_32(b"Hello,", 0x9747B28C)
    hasher.update(b" world!")
    assert hasher.digest() == b"\xba\x4c\x88\x24"

    hasher = mmh3.mmh3_32(b"Hello,", seed=0x9747B28C)
    hasher.update(b" world!")
    assert hasher.digest() == b"\xba\x4c\x88\x24"


def test_mmh3_32_sintdigest() -> None:
    hasher = mmh3.mmh3_32()
    hasher.update(b"foo")
    assert hasher.sintdigest() == -156908512

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    hasher = mmh3.mmh3_32()
    hasher.update(b"")
    assert hasher.sintdigest() == 0

    hasher = mmh3.mmh3_32(seed=1)
    hasher.update(b"")
    assert hasher.sintdigest() == 0x514E28B7

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43")
    hasher.update(b"\x65")
    assert hasher.sintdigest() == u32_to_s32(0x7E4A8634)

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43\x65\x87")
    assert hasher.sintdigest() == u32_to_s32(0xF55B516B)

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43")
    hasher.update(b"\x65\x87")
    assert hasher.sintdigest() == u32_to_s32(0xF55B516B)

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello, world!")
    assert hasher.sintdigest() == u32_to_s32(0x24884CBA)

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello,")
    hasher.update(b" world!")
    assert hasher.sintdigest() == u32_to_s32(0x24884CBA)

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"The quick brown fo")
    hasher.update(b"x jumps over the lazy dog")
    assert hasher.sintdigest() == u32_to_s32(0x2FA826CD)


def test_mmh3_32_uintdigest() -> None:
    hasher = mmh3.mmh3_32()
    hasher.update(b"foo")
    assert hasher.uintdigest() == 4138058784

    # Test vectors devised by Ian Boyd
    # https://stackoverflow.com/a/31929528
    hasher = mmh3.mmh3_32()
    hasher.update(b"")
    assert hasher.uintdigest() == 0

    hasher = mmh3.mmh3_32(seed=1)
    hasher.update(b"")
    assert hasher.uintdigest() == 0x514E28B7

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43")
    hasher.update(b"\x65")
    assert hasher.uintdigest() == 0x7E4A8634

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43\x65\x87")
    assert hasher.uintdigest() == 0xF55B516B

    hasher = mmh3.mmh3_32()
    hasher.update(b"\x21\x43")
    hasher.update(b"\x65\x87")
    assert hasher.uintdigest() == 0xF55B516B

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello, world!")
    assert hasher.uintdigest() == 0x24884CBA

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"Hello,")
    hasher.update(b" world!")
    assert hasher.uintdigest() == 0x24884CBA

    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"The quick brown fo")
    hasher.update(b"x jumps over the lazy dog")
    assert hasher.uintdigest() == 0x2FA826CD


def test_mmh3_32_copy() -> None:
    hasher = mmh3.mmh3_32(seed=0x9747B28C)
    hasher.update(b"The quick brown fox")

    hasher2 = hasher.copy()

    hasher.update(b" jumps over the lazy dog")
    assert hasher.uintdigest() == 0x2FA826CD

    hasher2.update(b" jumps over the lazy dog")
    assert hasher2.uintdigest() == 0x2FA826CD


def test_mmh3_x64_128_basic_ops() -> None:
    hasher = mmh3.mmh3_x64_128()
    assert hasher.digest_size == 16
    assert hasher.block_size == 32
    assert hasher.name == "mmh3_x64_128"


def test_mmh3_x64_128_digest() -> None:
    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"foo")
    assert hasher.digest() == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

    hasher = mmh3.mmh3_x64_128(b"", 0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

    hasher = mmh3.mmh3_x64_128(b"The quick brown ", seed=0x9747B28C)
    hasher.update(b"fox jumps over the lazy dog")
    assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

    hasher = mmh3.mmh3_x64_128(b"The quick brown ", 0x9747B28C)
    hasher.update(b"fox jumps over the lazy dog")
    assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"


def test_mmh3_x64_128_sintdigest() -> None:
    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"")
    assert hasher.sintdigest() == 0

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.sintdigest() == -8943985938913228316176695348732677855

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox j")
    hasher.update(b"umps over the lazy dog")
    assert hasher.sintdigest() == -8943985938913228316176695348732677855


def test_mmh3_x64_128_uintdigest() -> None:
    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"")
    assert hasher.uintdigest() == 0

    hasher = mmh3.mmh3_x64_128(seed=1)
    hasher.update(b"")
    assert hasher.uintdigest() == 108177238965372658051732455265379769525

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"foo")
    assert hasher.uintdigest() == 168394135621993849475852668931176482145

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fo")
    hasher.update(b"o")
    assert hasher.uintdigest() == 168394135621993849475852668931176482145

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fooo")
    assert hasher.uintdigest() == 93757880664175803030724836966881520758

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fooofooo")
    assert hasher.uintdigest() == 211983152696995059280678248292944636041

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fooo")
    hasher.update(b"fooo")
    assert hasher.uintdigest() == 211983152696995059280678248292944636041

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fooofoooo")
    assert hasher.uintdigest() == 338423359992422647011971677127905553798

    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"fooo")
    hasher.update(b"foooo")
    assert hasher.uintdigest() == 338423359992422647011971677127905553798

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.uintdigest() == 331338380982025235147197912083035533601

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"T")
    hasher.update(b"he quick brown fox jumps over the lazy dog")
    assert hasher.uintdigest() == 331338380982025235147197912083035533601

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quic")  # 8 bytes
    hasher.update(b"k brown fox jumps over the lazy dog")
    assert hasher.uintdigest() == 331338380982025235147197912083035533601

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick")
    hasher.update(b" brown fox jumps over the lazy dog")
    assert hasher.uintdigest() == 331338380982025235147197912083035533601


def test_mmh3_x64_128_stupledigest() -> None:
    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"")
    assert hasher.stupledigest() == (0, 0)

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.stupledigest() == (8325606756057297185, -484854449282476315)

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quic")
    hasher.update(b"k brown fox jumps over the lazy dog")
    assert hasher.stupledigest() == (8325606756057297185, -484854449282476315)


def test_mmh3_x64_128_utupledigest() -> None:
    hasher = mmh3.mmh3_x64_128()
    hasher.update(b"")
    assert hasher.utupledigest() == (0, 0)

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.utupledigest() == (8325606756057297185, 17961889624427075301)

    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quic")
    hasher.update(b"k brown fox jumps over the lazy dog")
    assert hasher.utupledigest() == (8325606756057297185, 17961889624427075301)


def test_mmh3_x64_128_copy() -> None:
    hasher = mmh3.mmh3_x64_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox")

    hasher2 = hasher.copy()

    hasher.update(b" jumps over the lazy dog")
    assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

    hasher2.update(b" jumps over the lazy dog")
    assert hasher2.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"


def test_mmh3_x86_128_basic_ops() -> None:
    hasher = mmh3.mmh3_x86_128()
    assert hasher.digest_size == 16
    assert hasher.block_size == 32
    assert hasher.name == "mmh3_x86_128"


def test_mmh3_x86_128_digest() -> None:
    hasher = mmh3.mmh3_x86_128()
    hasher.update(b"")
    assert (
        hasher.digest()
        == b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
    )

    hasher = mmh3.mmh3_x86_128(seed=1)
    hasher.update(b"")
    assert hasher.digest() == b"\xec\xad\xc4\x88\xb9\x01\xd2T\xb9\x01\xd2T\xb9\x01\xd2T"

    hasher = mmh3.mmh3_x86_128()
    hasher.update(b"foo")
    assert hasher.digest() == b"%\x1b|We%\xb6`e%\xb6`e%\xb6`"

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown")  # 15 bytes
    assert (
        hasher.digest() == b"2\xc3\n\xdaW\xc2\xcb\xa9\xc4\xbe\x12\xb9\xdc\x01\xe1\x8e"
    )

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown ")  # 16 bytes
    assert hasher.digest() == b"u\xb6\xf9\x07\xf5|\x93,\x0e\xf5\xf1\xf0k\x98\x83\x19"

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown")  # 15 bytes
    hasher.update(b" fox jumps over the lazy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown ")  # 16 bytes
    hasher.update(b"fox jumps over the lazy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox ju")
    hasher.update(b"mps ove")
    hasher.update(b"r the la")
    hasher.update(b"zy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(b"", 0x9747B28C)
    hasher.update(b"The quick brown fox ju")
    hasher.update(b"mps ove")
    hasher.update(b"r the la")
    hasher.update(b"zy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", seed=0x9747B28C)
    hasher.update(b"mps ove")
    hasher.update(b"r the la")
    hasher.update(b"zy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

    hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", 0x9747B28C)
    hasher.update(b"mps ove")
    hasher.update(b"r the la")
    hasher.update(b"zy dog")
    assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"


def test_mmh3_x86_128_sintdigest() -> None:
    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.sintdigest() == -66843170628920214366208380873156012706


def test_mmh3_x86_128_uintdigest() -> None:
    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.uintdigest() == 273439196292018249097166226558612198750


def test_mmh3_x86_128_stupledigest() -> None:
    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.stupledigest() == (5528275682885686622, -3623575540584727908)


def test_mmh3_x86_128_utupledigest() -> None:
    hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
    hasher.update(b"The quick brown fox jumps over the lazy dog")
    assert hasher.utupledigest() == (5528275682885686622, 14823168533124823708)


================================================
FILE: tox.ini
================================================
[tox]
requires =
    tox>=4
envlist = lint, type, py{310,311,312,313,314,314t}

[testenv]
description = run unit tests
commands_pre =
    uv pip install ".[test]"
commands =
    pytest {posargs}

[testenv:lint]
description = run linters with formatting
skip_install = true
allowlist_externals =
    find
    npx
commands_pre =
    uv pip install ".[lint]"
commands =
    ruff format .
    ruff check --fix .
    find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} +
    npx prettier --write .
    pylint --recursive=y .
    npx markdownlint --config .markdown-lint.yml \
      --ignore-path .gitignore **/*.md
    codespell
    actionlint

[testenv:type]
description = run type checks
commands_pre =
    uv pip install ".[test,type]"
commands =
    mypy --strict tests

[testenv:docs]
description = run documentation build
allowlist_externals =
    make
commands_pre =
    uv pip install ".[docs]"
commands =
    make -C docs clean
    make -C docs html

[testenv:build_cfiles]
allowlist_externals =
    find
    git
commands_pre =
    uv pip install ".[lint]"
commands =
    git submodule update --init
    python util/refresh.py
    find ./src/mmh3 -name '*.[ch]' -exec clang-format -i {} +

[testenv:benchmark]
description = run benchmarks
commands_pre =
    uv pip install ".[benchmark]"
commands =
    python benchmark/benchmark.py {posargs}

[testenv:plot]
description = plot benchmark results
commands_pre =
    uv pip install ".[benchmark,plot]"
commands =
    python benchmark/plot_graph.py {posargs}


================================================
FILE: util/FILE_HEADER
================================================
/***
 * This file is under MIT <year> Hajime Senuma, just like other files.
 * See LICENSE for details.
 *
 * It was originally written by Austin Appleby in C++ under the public domain,
 * but ported to PEP 7 C for Python 3.6 and later by the mmh3 project.
 *
 * Any issues should be reported to https://github.com/hajimes/mmh3/issues.
 *
 * The following is the original public domain notice by Austin Appleby.
 */

//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

================================================
FILE: util/refresh.py
================================================
# pylint: disable=missing-function-docstring
"""A script to generate Murmurhash3 C files from the original C++ source."""

# For forward references
from __future__ import annotations

import os
import re
import textwrap
from collections.abc import Callable

###
# Simple classes to handle the transformation of the original code.
#


class MMH3Source:
    """A data class to represent the original source code of MurmurHash3.

    Lines to be retrieved are hard-coded, as the original code is effectively frozen.
    """

    def __init__(self, code: str) -> None:
        self._code_lines = code.split("\n")

    @property
    def note_comment(self) -> str:
        return "\n".join(self._code_lines[4:8])

    @property
    def header_include(self) -> str:
        return "\n".join(self._code_lines[9:10])

    @property
    def macros(self) -> str:
        return "\n".join(self._code_lines[11:50])

    @property
    def getblock_functions(self) -> str:
        return "\n".join(self._code_lines[50:64])

    @property
    def finalization_mixes(self) -> str:
        return "\n".join(self._code_lines[64:91])

    @property
    def body(self) -> str:
        return "\n".join(self._code_lines[91:336])

    @property
    def finalization_x86_128(self) -> str:
        return "\n".join(self._code_lines[233:246])

    @property
    def finalization_x64_128(self) -> str:
        return "\n".join(self._code_lines[318:329])

    @property
    def constants_x86_128(self) -> str:
        return "\n".join(self._code_lines[160:164])

    @property
    def constants_x64_128(self) -> str:
        return "\n".join(self._code_lines[263:265])


class MMH3Header:
    """A data class to represent the original header code of MurmurHash3.

    Lines to be retrieved are hard-coded, as the original code is effectively frozen.
    """

    def __init__(self, code: str) -> None:
        self._code_lines = code.split("\n")

    @property
    def header_guards_begin(self) -> str:
        return "\n".join(self._code_lines[4:7])

    @property
    def stdint(self) -> str:
        return "\n".join(self._code_lines[7:26])

    @property
    def declarations(self) -> str:
        return "\n".join(self._code_lines[26:36])

    @property
    def header_guards_end(self) -> str:
        return "\n".join(self._code_lines[36:37])


class MMH3CodeBuilder:
    """A builder class to generate the new MurmurHash3 C code."""

    def __init__(self) -> None:
        self._code: list[tuple[str, list[Callable[[str], str]]]] = []

    def add(
        self, subcode: str, transforms: list[Callable[[str], str]] | None = None
    ) -> MMH3CodeBuilder:
        if transforms is None:
            transforms = []

        self._code.append((subcode, transforms))
        return self

    def build(self) -> str:
        new_code = ""

        for subcode, transforms in self._code:
            for tr in transforms:
                subcode = tr(subcode)
            new_code += subcode + "\n\n"

        return new_code


###
# The following functions are used to transform the original MurmurHash3 code.
#


def append_python_directives(subcode: str) -> str:
    """Append Python.h, as well as a macro definition to handle 64-bit data.

    Args:
        subcode (str): The code to be appended.

    Returns:
        str: The appended code.
    """
    subcode += "\n\n"

    subcode += textwrap.dedent("""\
        // To handle 64-bit data; see https://docs.python.org/3/c-api/arg.html
        #ifndef PY_SSIZE_T_CLEAN
        #define PY_SSIZE_T_CLEAN
        #endif
        #include <Python.h>
        """)

    return subcode


def append_byteswap_header(subcode: str) -> str:
    """Append a header to the code that includes byteswap.h if the system is big endian.

    Args:
        subcode (str): The code to be appended.

    Returns:
        str: The appended code.
    """
    subcode += "\n"

    subcode += textwrap.dedent("""\
        #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
        #include <byteswap.h>
        #endif
        """)

    return subcode


def introduce_py_ssize_t(subcode: str) -> str:
    """Use Py_ssize_t instead of int as the index type.

    Py_ssize_t is the type used by Python to represent the size of objects.
    It is required to handle 64-bit data in Python extensions.

    See https://docs.python.org/3/c-api/intro.html#c.Py_ssize_t
    and
    https://peps.python.org/pep-0353/

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    transformations = [
        ["int len", "Py_ssize_t len"],
        ["const int nblocks", "const Py_ssize_t nblocks"],
        ["for(int i", "for(Py_ssize_t i"],
    ]

    for tr in transformations:
        subcode = subcode.replace(tr[0], tr[1])

    return subcode


def transform_getblocks(subcode: str) -> str:
    """Revise getblock functions so that it handles big endian and 64-bit data.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    # pylint: disable=invalid-name

    transformations = [
        ["FORCE_INLINE", "static FORCE_INLINE"],
        ["int i", "Py_ssize_t i"],
    ]

    for tr in transformations:
        subcode = subcode.replace(tr[0], tr[1])

    BYTE_SWAP_IF_BIG_ENDIAN = textwrap.dedent("""\
        #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
            return bswap_\\1(p[i]);
        #else
            return p[i];
        #endif
        """)

    subcode = re.sub(
        r"getblock(.*?)(\s\(.*?\{\n).*?\}",
        "getblock\\1\\2" + BYTE_SWAP_IF_BIG_ENDIAN + "}",
        subcode,
        flags=re.DOTALL | re.MULTILINE,
    )

    return subcode


def transform_finalization_mixes(subcode: str) -> str:
    """Revise the finalization operations in MurmurHash3.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """

    transformations = [
        ["FORCE_INLINE", "static FORCE_INLINE"],
        ["int i", "Py_ssize_t i"],
    ]

    for tr in transformations:
        subcode = subcode.replace(tr[0], tr[1])

    return subcode


def transform_x86_128_return(subcode: str) -> str:
    """Revise the return block of MurmurHash3_x86_128 so that it handles big endian.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    # pylint: disable=invalid-name

    BYTE_SWAP_IF_BIG_ENDIAN = textwrap.dedent("""\
        #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
                ((uint32_t *)out)[0] = h2;
                ((uint32_t *)out)[1] = h1;
                ((uint32_t *)out)[2] = h4;
                ((uint32_t *)out)[3] = h3;
        #else
            \\1
        #endif
        """)

    subcode = re.sub(
        r"(\(\(uint32_t\*\)out\)\[0\] = h1;[\s\S]*\(\(uint32_t\*\)out\)\[3\] = h4;)",
        BYTE_SWAP_IF_BIG_ENDIAN,
        subcode,
        flags=re.DOTALL | re.MULTILINE,
    )

    return subcode


def expand_win_stdint_typedefs(subcode: str) -> str:
    """Delineate int type definitions for the older versions of the VS compiler.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    # pylint: disable=invalid-name

    MSC_STDINT_TYPEDEFS = textwrap.dedent("""\
        typedef signed __int8 int8_t;
        typedef signed __int32 int32_t;
        typedef signed __int64 int64_t;
        typedef unsigned __int8 uint8_t;
        typedef unsigned __int32 uint32_t;
        typedef unsigned __int64 uint64_t;
        """)

    return re.sub(
        r"typedef unsigned char(.*)uint64_t;",
        MSC_STDINT_TYPEDEFS,
        subcode,
        flags=re.DOTALL,
    )


def append_mur_macros(subcode: str) -> str:
    """Append building blocks for multiply and rotate (MUR) operations.

    These functions are used by mmh3 hashers.

    In future updates, they may be also used by one-shot hash functions,
    although performance tests must be employed before such refactoring.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    subcode += "\n\n"

    subcode += textwrap.dedent("""\
        //-----------------------------------------------------------------------------
        // Building blocks for multiply and rotate (MUR) operations.
        // Names are taken from Google Guava's implementation
        """)

    subcode += "\n"

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint32_t
        mixK1(uint32_t k1)
        {
            const uint32_t c1 = 0xcc9e2d51;
            const uint32_t c2 = 0x1b873593;

            k1 *= c1;
            k1 = ROTL32(k1, 15);
            k1 *= c2;

            return k1;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint32_t
        mixH1(uint32_t h1, const uint32_t h2, const uint8_t shift, const uint32_t c1)
        {
            h1 = ROTL32(h1, shift);
            h1 += h2;
            h1 = h1 * 5 + c1;

            return h1;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint64_t
        mixK_x64_128(uint64_t k1, const uint8_t shift,
                    const uint64_t c1, const uint64_t c2)
        {
            k1 *= c1;
            k1 = ROTL64(k1, shift);
            k1 *= c2;

            return k1;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint64_t
        mixK1_x64_128(uint64_t k1)
        {
            const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
            const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

            k1 *= c1;
            k1 = ROTL64(k1, 31);
            k1 *= c2;

            return k1;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint64_t
        mixK2_x64_128(uint64_t k2)
        {
            const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
            const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

            k2 *= c2;
            k2 = ROTL64(k2, 33);
            k2 *= c1;

            return k2;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint64_t
        mixH_x64_128(uint64_t h1, uint64_t h2, const uint8_t shift, const uint32_t c)
        {
            h1 = ROTL64(h1, shift);
            h1 += h2;
            h1 = h1 * 5 + c;

            return h1;
        }
        """)

    subcode += textwrap.dedent("""\
        static FORCE_INLINE uint64_t
        mixK_x86_128(uint32_t k, const uint8_t shift, const uint32_t c1,
                    const uint32_t c2)
        {
            k *= c1;
            k = ROTL32(k, shift);
            k *= c2;

            return k;
        }
        """)

    return subcode


def generate_hasher_digest_x86_128_pre(subcode: str) -> str:
    """Generate the first part of the digest function for x86_128.

    Args:
        subcode (str): The constants in mmh3_x86_128.

    Returns:
        str: The first part of the digest function for x86_128.
    """
    hasher_digests = "\n\n"

    hasher_digests += textwrap.dedent("""\
        static FORCE_INLINE void
        digest_x86_128_impl(uint32_t h1, uint32_t h2, uint32_t h3, uint32_t h4,
            const uint32_t k1, const uint32_t k2, const uint32_t k3,
            const uint32_t k4, const Py_ssize_t len, const char *out)
        {
        """)

    hasher_digests += subcode + "\n"

    return hasher_digests


def generate_hasher_digest_x86_128_main(subcode: str) -> str:
    """Generate the main part of the digest function for x86_128.

    Args:
        subcode (str): The finalization code in mmh3 x86_128.

    Returns:
        str: The main part of the digest function for x86_128.
    """
    hasher_digests = ""

    hasher_digests += textwrap.dedent("""\
        h1 ^= mixK_x86_128(k1, 15, c1, c2);
        h2 ^= mixK_x86_128(k2, 16, c2, c3);
        h3 ^= mixK_x86_128(k3, 17, c3, c4);
        h4 ^= mixK_x86_128(k4, 18, c4, c1);
        """)

    hasher_digests += subcode + "\n"
    hasher_digests += textwrap.dedent("""\
        #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
            ((uint32_t *)out)[0] = bswap_32(h1);
            ((uint32_t *)out)[1] = bswap_32(h2);
            ((uint32_t *)out)[2] = bswap_32(h3);
            ((uint32_t *)out)[3] = bswap_32(h4);
        #else
            ((uint32_t *)out)[0] = h1;
            ((uint32_t *)out)[1] = h2;
            ((uint32_t *)out)[2] = h3;
            ((uint32_t *)out)[3] = h4;
        #endif
        """)
    hasher_digests += "\n}"

    return hasher_digests


def generate_hasher_digest_x64_128(subcode: str) -> str:
    """Generate the digest function for x64_128.

    Args:
        subcode (str): The finalization code in mmh3 x64_128.

    Returns:
        str: The digest function for x64_128.
    """
    hasher_digests = "\n\n"

    hasher_digests += textwrap.dedent("""\
        //-----------------------------------------------------------------------------
        // Finalization function
        """)

    hasher_digests += "\n"

    hasher_digests += textwrap.dedent("""\
        static FORCE_INLINE void
        digest_x64_128_impl(uint64_t h1, uint64_t h2, const uint64_t k1,
            const uint64_t k2, const Py_ssize_t len, const char *out)
        {
        """)
    hasher_digests += textwrap.dedent("""\
        h1 ^= mixK1_x64_128(k1);
        h2 ^= mixK2_x64_128(k2);
        """)
    hasher_digests += subcode + "\n"
    hasher_digests += textwrap.dedent("""\
        #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
            ((uint64_t *)out)[0] = bswap_64(h1);
            ((uint64_t *)out)[1] = bswap_64(h2);
        #else
            ((uint64_t *)out)[0] = h1;
            ((uint64_t *)out)[1] = h2;
        #endif
        """)
    hasher_digests += "\n}"

    return hasher_digests


def fix_non_win_force_inline(subcode: str) -> str:
    """Fix the FORCE_INLINE macro so that it works on old GCC and RHEL.

    Based on a commit from Micha Gorelick (@mynameisfiber).
    https://github.com/hajimes/mmh3/pull/1

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    # pylint: disable=invalid-name

    NON_WIN_FORCE_INLINE_ORIGINAL = (
        "#define	FORCE_INLINE inline __attribute__((always_inline))"
    )

    NON_WIN_FORCE_INLINE_REVISED = textwrap.dedent("""\
        #if ((__GNUC__ > 4) || (__GNUC__ == 4 && GNUC_MINOR >= 4))
        /* gcc version >= 4.4 4.1 = RHEL 5, 4.4 = RHEL 6. Don't inline for RHEL 5 gcc
        * which is 4.1*/
        #define FORCE_INLINE inline __attribute__((always_inline))
        #else
        #define FORCE_INLINE
        #endif
        """)

    return subcode.replace(NON_WIN_FORCE_INLINE_ORIGINAL, NON_WIN_FORCE_INLINE_REVISED)


def force_inline_force_inline(subcode: str) -> str:
    """Force inline to use static FORCE_INLINE.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """
    return re.sub(r"^inline ", "static FORCE_INLINE ", subcode, flags=re.MULTILINE)


def lowercase_function_names(subcode: str) -> str:
    """Lowercase functions names. Purely for style.

    Args:
        subcode (str): The code to be transformed.

    Returns:
        str: The transformed code.
    """

    function_names = [
        "MurmurHash3_x86_32",
        "MurmurHash3_x86_128",
        "MurmurHash3_x64_128",
    ]

    for fn in function_names:
        subcode = subcode.replace(fn, fn.lower())

    return subcode


if __name__ == "__main__":
    file_path = os.path.realpath(__file__)
    dir_path = os.path.dirname(file_path)

    original_source_path = os.path.join(dir_path, "smhasher/src/MurmurHash3.cpp")
    original_header_path = os.path.join(dir_path, "smhasher/src/MurmurHash3.h")

    NEW_SOURCE_NAME = "murmurhash3.c"
    NEW_HEADER_NAME = "murmurhash3.h"
    FILE_HEADER_NAME = "FILE_HEADER"

    new_source_path = os.path.join(dir_path, "../src/mmh3", NEW_SOURCE_NAME)
    new_header_path = os.path.join(dir_path, "../src/mmh3", NEW_HEADER_NAME)
    file_header_path = os.path.join(dir_path, FILE_HEADER_NAME)

    with (
        open(original_source_path, encoding="utf-8") as source_file,
        open(original_header_path, encoding="utf-8") as header_file,
        open(file_header_path, encoding="utf-8") as file_header_file,
    ):
        source = MMH3Source(source_file.read())
        header = MMH3Header(header_file.read())
        file_header = file_header_file.read()

        new_source_builder = MMH3CodeBuilder()
        new_source_builder.add(file_header)
        new_source_builder.add(source.note_comment)
        new_source_builder.add(source.header_include, [str.lower])
        new_source_builder.add(
            source.body,
            [introduce_py_ssize_t, transform_x86_128_return, lowercase_function_names],
        )

        new_header_builder = MMH3CodeBuilder()
        new_header_builder.add(file_header)
        new_header_builder.add(
            header.header_guards_begin,
            [append_python_directives, append_byteswap_header],
        )
        new_header_builder.add(
            header.stdint,
            [expand_win_stdint_typedefs],
        )
        new_header_builder.add(
            source.macros,
            [fix_non_win_force_inline, force_inline_force_inline],
        )
        new_header_builder.add(
            source.getblock_functions,
            [transform_getblocks],
        )
        new_header_builder.add(
            "",
            [append_mur_macros],
        )
        new_header_builder.add(
            source.finalization_mixes,
            [transform_finalization_mixes],
        )
        new_header_builder.add(
            source.finalization_x64_128,
            [generate_hasher_digest_x64_128],
        )
        new_header_builder.add(
            source.constants_x86_128,
            [generate_hasher_digest_x86_128_pre],
        )
        new_header_builder.add(
            source.finalization_x86_128,
            [generate_hasher_digest_x86_128_main],
        )
        new_header_builder.add(
            header.declarations,
            [lowercase_function_names, introduce_py_ssize_t],
        )
        new_header_builder.add(header.header_guards_end)

        with open(new_source_path, "w", encoding="utf-8") as f:
            f.write(new_source_builder.build())
        with open(new_header_path, "w", encoding="utf-8") as f:
            f.write(new_header_builder.build())