Full Code of rust-lang-nursery/packed_simd for AI

master d938e39bee9b cached

363 files

939.2 KB

281.6k tokens

747 symbols

1 requests

Download .txt

Showing preview only (1,028K chars total). Download the full file or copy to clipboard to get everything.

Repository: rust-lang-nursery/packed_simd
Branch: master
Commit: d938e39bee9b
Files: 363
Total size: 939.2 KB

Directory structure:
gitextract_ltzo2pap/

├── .appveyor.yml
├── .github/
│   └── workflows/
│       ├── benchmarks.yml
│       ├── ci.yml
│       ├── docs.yml
│       └── run-ci-script.yml
├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── bors.toml
├── build.rs
├── ci/
│   ├── all.sh
│   ├── android-install-ndk.sh
│   ├── android-install-sdk.sh
│   ├── android-sysimage.sh
│   ├── benchmark.sh
│   ├── deploy_and_run_on_ios_simulator.rs
│   ├── docker/
│   │   ├── aarch64-linux-android/
│   │   │   └── Dockerfile
│   │   ├── aarch64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── arm-unknown-linux-gnueabi/
│   │   │   └── Dockerfile
│   │   ├── arm-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── armv7-linux-androideabi/
│   │   │   └── Dockerfile
│   │   ├── armv7-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── i586-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── i686-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── mips-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── mips64-unknown-linux-gnuabi64/
│   │   │   └── Dockerfile
│   │   ├── mips64el-unknown-linux-gnuabi64/
│   │   │   └── Dockerfile
│   │   ├── mipsel-unknown-linux-musl/
│   │   │   └── Dockerfile
│   │   ├── powerpc-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── powerpc64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── powerpc64le-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── s390x-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── sparc64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── thumbv7neon-linux-androideabi/
│   │   │   └── Dockerfile
│   │   ├── thumbv7neon-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── wasm32-unknown-unknown/
│   │   │   └── Dockerfile
│   │   ├── x86_64-linux-android/
│   │   │   └── Dockerfile
│   │   ├── x86_64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   └── x86_64-unknown-linux-gnu-emulated/
│   │       └── Dockerfile
│   ├── dox.sh
│   ├── linux-s390x.sh
│   ├── linux-sparc64.sh
│   ├── lld-shim.rs
│   ├── max_line_width.sh
│   ├── run-docker.sh
│   ├── run.sh
│   ├── run_examples.sh
│   ├── runtest-android.rs
│   ├── setup_benchmarks.sh
│   └── test-runner-linux
├── contributing.md
├── examples/
│   ├── Cargo.toml
│   ├── aobench/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   ├── ambient_occlusion.rs
│   │   │   ├── isec_plane.rs
│   │   │   ├── isec_sphere.rs
│   │   │   ├── random.rs
│   │   │   └── scanlines.rs
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── rustfmt.toml
│   │   ├── src/
│   │   │   ├── ambient_occlusion.rs
│   │   │   ├── geometry/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── plane.rs
│   │   │   │   ├── ray.rs
│   │   │   │   ├── rayxN.rs
│   │   │   │   ├── sphere.rs
│   │   │   │   ├── vec.rs
│   │   │   │   └── vecxN.rs
│   │   │   ├── image.rs
│   │   │   ├── intersection/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── packet.rs
│   │   │   │   ├── ray_plane.rs
│   │   │   │   ├── ray_sphere.rs
│   │   │   │   └── single.rs
│   │   │   ├── ispc_.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── random.rs
│   │   │   ├── scalar.rs
│   │   │   ├── scalar_parallel.rs
│   │   │   ├── scene/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── random.rs
│   │   │   │   └── test.rs
│   │   │   ├── tiled.rs
│   │   │   ├── tiled_parallel.rs
│   │   │   ├── vector.rs
│   │   │   └── vector_parallel.rs
│   │   └── volta/
│   │       ├── .gitignore
│   │       └── ao.ispc
│   ├── dot_product/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── fannkuch_redux/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── fannkuchredux-output.txt
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── mandelbrot/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_tasks.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── mandelbrot-output.txt
│   │   │   ├── scalar_par.rs
│   │   │   └── simd_par.rs
│   │   └── volta/
│   │       └── mandelbrot.ispc
│   ├── matrix_inverse/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── nbody/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── algs.rs
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── nbody-output.txt
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── options_pricing/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── scalar.rs
│   │   │   ├── simd.rs
│   │   │   ├── simd_kernels.rs
│   │   │   ├── simd_par.rs
│   │   │   └── sum.rs
│   │   └── volta/
│   │       ├── options.ispc
│   │       └── options_defs.h
│   ├── rust-toolchain
│   ├── slice_sum/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       └── main.rs
│   ├── spectral_norm/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── scalar.rs
│   │       ├── simd.rs
│   │       └── spectralnorm-output.txt
│   ├── stencil/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_loops.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── scalar.rs
│   │   │   ├── simd.rs
│   │   │   └── simd_par.rs
│   │   └── volta/
│   │       ├── .gitignore
│   │       ├── Makefile
│   │       ├── common.mk
│   │       ├── stencil.cpp
│   │       ├── stencil.ispc
│   │       ├── stencil_serial.cpp
│   │       ├── tasksys.cpp
│   │       └── timing.h
│   └── triangle_xform/
│       ├── Cargo.toml
│       ├── readme.md
│       └── src/
│           ├── lib.rs
│           ├── scalar.rs
│           └── simd.rs
├── micro_benchmarks/
│   ├── Cargo.toml
│   ├── benches/
│   │   └── mask_reductions.rs
│   └── rust-toolchain
├── perf-guide/
│   ├── .gitignore
│   ├── book.toml
│   └── src/
│       ├── SUMMARY.md
│       ├── ascii.css
│       ├── bound_checks.md
│       ├── float-math/
│       │   ├── approx.md
│       │   ├── fma.md
│       │   ├── fp.md
│       │   └── svml.md
│       ├── introduction.md
│       ├── prof/
│       │   ├── linux.md
│       │   ├── mca.md
│       │   └── profiling.md
│       ├── target-feature/
│       │   ├── attribute.md
│       │   ├── features.md
│       │   ├── inlining.md
│       │   ├── practice.md
│       │   ├── runtime.md
│       │   └── rustflags.md
│       └── vert-hor-ops.md
├── rust-toolchain
├── rustfmt.toml
├── src/
│   ├── api/
│   │   ├── bit_manip.rs
│   │   ├── bitmask.rs
│   │   ├── cast/
│   │   │   ├── macros.rs
│   │   │   ├── v128.rs
│   │   │   ├── v16.rs
│   │   │   ├── v256.rs
│   │   │   ├── v32.rs
│   │   │   ├── v512.rs
│   │   │   └── v64.rs
│   │   ├── cast.rs
│   │   ├── cmp/
│   │   │   ├── eq.rs
│   │   │   ├── ord.rs
│   │   │   ├── partial_eq.rs
│   │   │   ├── partial_ord.rs
│   │   │   └── vertical.rs
│   │   ├── cmp.rs
│   │   ├── default.rs
│   │   ├── fmt/
│   │   │   ├── binary.rs
│   │   │   ├── debug.rs
│   │   │   ├── lower_hex.rs
│   │   │   ├── octal.rs
│   │   │   └── upper_hex.rs
│   │   ├── fmt.rs
│   │   ├── from/
│   │   │   ├── from_array.rs
│   │   │   └── from_vector.rs
│   │   ├── from.rs
│   │   ├── hash.rs
│   │   ├── into_bits/
│   │   │   ├── arch_specific.rs
│   │   │   ├── macros.rs
│   │   │   ├── v128.rs
│   │   │   ├── v16.rs
│   │   │   ├── v256.rs
│   │   │   ├── v32.rs
│   │   │   ├── v512.rs
│   │   │   └── v64.rs
│   │   ├── into_bits.rs
│   │   ├── math/
│   │   │   ├── float/
│   │   │   │   ├── abs.rs
│   │   │   │   ├── consts.rs
│   │   │   │   ├── cos.rs
│   │   │   │   ├── exp.rs
│   │   │   │   ├── ln.rs
│   │   │   │   ├── mul_add.rs
│   │   │   │   ├── mul_adde.rs
│   │   │   │   ├── powf.rs
│   │   │   │   ├── recpre.rs
│   │   │   │   ├── rsqrte.rs
│   │   │   │   ├── sin.rs
│   │   │   │   ├── sqrt.rs
│   │   │   │   ├── sqrte.rs
│   │   │   │   └── tanh.rs
│   │   │   └── float.rs
│   │   ├── math.rs
│   │   ├── minimal/
│   │   │   ├── iuf.rs
│   │   │   ├── mask.rs
│   │   │   └── ptr.rs
│   │   ├── minimal.rs
│   │   ├── ops/
│   │   │   ├── scalar_arithmetic.rs
│   │   │   ├── scalar_bitwise.rs
│   │   │   ├── scalar_mask_bitwise.rs
│   │   │   ├── scalar_shifts.rs
│   │   │   ├── vector_arithmetic.rs
│   │   │   ├── vector_bitwise.rs
│   │   │   ├── vector_float_min_max.rs
│   │   │   ├── vector_int_min_max.rs
│   │   │   ├── vector_mask_bitwise.rs
│   │   │   ├── vector_neg.rs
│   │   │   ├── vector_rotates.rs
│   │   │   └── vector_shifts.rs
│   │   ├── ops.rs
│   │   ├── ptr/
│   │   │   └── gather_scatter.rs
│   │   ├── ptr.rs
│   │   ├── reductions/
│   │   │   ├── bitwise.rs
│   │   │   ├── float_arithmetic.rs
│   │   │   ├── integer_arithmetic.rs
│   │   │   ├── mask.rs
│   │   │   └── min_max.rs
│   │   ├── reductions.rs
│   │   ├── select.rs
│   │   ├── shuffle.rs
│   │   ├── shuffle1_dyn.rs
│   │   ├── slice/
│   │   │   ├── from_slice.rs
│   │   │   └── write_to_slice.rs
│   │   ├── slice.rs
│   │   └── swap_bytes.rs
│   ├── api.rs
│   ├── codegen/
│   │   ├── bit_manip.rs
│   │   ├── llvm.rs
│   │   ├── math/
│   │   │   ├── float/
│   │   │   │   ├── abs.rs
│   │   │   │   ├── cos.rs
│   │   │   │   ├── cos_pi.rs
│   │   │   │   ├── exp.rs
│   │   │   │   ├── ln.rs
│   │   │   │   ├── macros.rs
│   │   │   │   ├── mul_add.rs
│   │   │   │   ├── mul_adde.rs
│   │   │   │   ├── powf.rs
│   │   │   │   ├── sin.rs
│   │   │   │   ├── sin_cos_pi.rs
│   │   │   │   ├── sin_pi.rs
│   │   │   │   ├── sqrt.rs
│   │   │   │   ├── sqrte.rs
│   │   │   │   └── tanh.rs
│   │   │   └── float.rs
│   │   ├── math.rs
│   │   ├── pointer_sized_int.rs
│   │   ├── reductions/
│   │   │   ├── mask/
│   │   │   │   ├── aarch64.rs
│   │   │   │   ├── arm.rs
│   │   │   │   ├── fallback.rs
│   │   │   │   ├── fallback_impl.rs
│   │   │   │   ├── x86/
│   │   │   │   │   ├── avx.rs
│   │   │   │   │   ├── avx2.rs
│   │   │   │   │   ├── sse.rs
│   │   │   │   │   └── sse2.rs
│   │   │   │   └── x86.rs
│   │   │   └── mask.rs
│   │   ├── reductions.rs
│   │   ├── shuffle.rs
│   │   ├── shuffle1_dyn.rs
│   │   ├── swap_bytes.rs
│   │   ├── v128.rs
│   │   ├── v16.rs
│   │   ├── v256.rs
│   │   ├── v32.rs
│   │   ├── v512.rs
│   │   ├── v64.rs
│   │   ├── vPtr.rs
│   │   └── vSize.rs
│   ├── codegen.rs
│   ├── lib.rs
│   ├── masks.rs
│   ├── sealed.rs
│   ├── testing/
│   │   ├── macros.rs
│   │   └── utils.rs
│   ├── testing.rs
│   ├── v128.rs
│   ├── v16.rs
│   ├── v256.rs
│   ├── v32.rs
│   ├── v512.rs
│   ├── v64.rs
│   ├── vPtr.rs
│   └── vSize.rs
├── tests/
│   └── endianness.rs
└── verify/
    └── verify/
        ├── Cargo.toml
        ├── readme.md
        ├── rust-toolchain
        └── src/
            ├── api/
            │   ├── math/
            │   │   └── float/
            │   │       ├── mod.rs
            │   │       └── mul_add.rs
            │   ├── math.rs
            │   ├── ops/
            │   │   ├── vector_rotates/
            │   │   │   └── x86.rs
            │   │   └── vector_rotates.rs
            │   ├── ops.rs
            │   ├── reductions/
            │   │   ├── mask/
            │   │   │   ├── avx.rs
            │   │   │   ├── avx2.rs
            │   │   │   ├── sse.rs
            │   │   │   └── sse2.rs
            │   │   └── mask.rs
            │   └── reductions.rs
            ├── api.rs
            └── lib.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .appveyor.yml
================================================
matrix:
  allow_failures:
    # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72
    - TARGET: i686-pc-windows-msvc
    - TARGET: i686-pc-windows-gnu
    - TARGET: x86_64-pc-windows-gnu
  fast_finish: true

environment:
  matrix:
    - TARGET: x86_64-pc-windows-msvc
      MSYSTEM: MINGW64
      NOVERIFY: "1"
    - TARGET: x86_64-pc-windows-msvc
      MSYSTEM: MINGW64
      RUSTFLAGS: "-C target-feature=+sse4.2"
      NOVERIFY: "1"
    - TARGET: x86_64-pc-windows-msvc
      MSYSTEM: MINGW64
      RUSTFLAGS: "-C target-feature=+avx"
      NOVERIFY: "1"
    - TARGET: x86_64-pc-windows-msvc
      MSYSTEM: MINGW64
      RUSTFLAGS: "-C target-feature=+avx2"
      NOVERIFY: "1"

    - TARGET: i686-pc-windows-msvc
      MSYSTEM: MINGW32
      NOVERIFY: "1"
    - TARGET: i686-pc-windows-msvc
      MSYSTEM: MINGW32
      RUSTFLAGS: "-C target-feature=+sse4.2"
      NOVERIFY: "1"
    - TARGET: i686-pc-windows-msvc
      MSYSTEM: MINGW32
      RUSTFLAGS: "-C target-feature=+avx"
      NOVERIFY: "1"
    - TARGET: i686-pc-windows-msvc
      MSYSTEM: MINGW32
      RUSTFLAGS: "-C target-feature=+avx2"
      NOVERIFY: "1"

    - TARGET: x86_64-pc-windows-gnu
      MSYSTEM: MINGW64

    - TARGET: i686-pc-windows-gnu
      MSYSTEM: MINGW32
    - TARGET: x86_64-pc-windows-gnu
      MSYSTEM: MINGW64
install:
  - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" }
  - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe"
  - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null
  - ps: $env:PATH="$env:PATH;C:\rust\bin"
  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
  - rustc -vV
  - cargo -vV
build: false
test_script: bash -c "ci/run.sh"


================================================
FILE: .github/workflows/benchmarks.yml
================================================
name: benchmarks

on:
  push:
    branches:
      - master
  pull_request:
  workflow_dispatch:

jobs:
  x86_64-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    with:
      target: x86_64-unknown-linux-gnu
      setup_script: ci/setup_benchmarks.sh
      script: ci/benchmark.sh
      norun: 1
      verify: 1
      # FIXME: figure out how to add downloaded ispc to PATH
      # features: ispc
  x86_64-apple-darwin:
    uses: ./.github/workflows/run-ci-script.yml
    with:
      target: x86_64-apple-darwin
      runner: macos-latest
      setup_script: ci/setup_benchmarks.sh
      script: ci/benchmark.sh
      norun: 1
      verify: 1
      # FIXME: figure out how to add downloaded ispc to PATH
      # features: ispc


================================================
FILE: .github/workflows/ci.yml
================================================
name: ci

# trigger for all PRs and changes to master
on:
  push:
    branches:
      - master
  pull_request:

jobs:
  rustfmt:
    uses: ./.github/workflows/run-ci-script.yml
    with:
      script: ci/all.sh check_fmt || true
  x86_64-unknown-linux-android:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: x86_64-linux-android
  armv7-linux-androideabi:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: armv7-linux-androideabi
  aarch64-unknown-linux-android-NEON:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: aarch64-linux-android
      rustflags: -Ctarget-feature=+neon
  thumbv7neon-linux-androideabi:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: thumbv7neon-linux-androideabi
  i586-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i586-unknown-linux-gnu
      rustflags: -Crelocation-model=static
  i586-unknown-linux-gnu-SSE:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i586-unknown-linux-gnu
      rustflags: -Crelocation-model=static -Ctarget-feature=+sse
  i586-unknown-linux-gnu-SSE2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i586-unknown-linux-gnu
      rustflags: -Crelocation-model=static -Ctarget-feature=+sse2
  i686-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i686-unknown-linux-gnu
      rustflags: -Crelocation-model=static
  i686-unknown-linux-gnu-SSE4_2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i686-unknown-linux-gnu
      rustflags: -Crelocation-model=static -Ctarget-feature=+sse4.2
  i686-unknown-linux-gnu-AVX2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: i686-unknown-linux-gnu
      rustflags: -Crelocation-model=static -Ctarget-feature=+avx2
  x86_64-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: x86_64-unknown-linux-gnu
  x86_64-unknown-linux-gnu-SSE4_2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: x86_64-unknown-linux-gnu
      rustflags: -Ctarget-feature=+sse4.2
  x86_64-unknown-linux-gnu-AVX2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: x86_64-unknown-linux-gnu
      rustflags: -Ctarget-feature=+avx2
  arm-unknown-linux-gnueabihf:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: arm-unknown-linux-gnueabihf
  armv7-unknown-linux-gnueabihf:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: armv7-unknown-linux-gnueabihf
  armv7-unknown-linux-gnueabihf-NEON:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: armv7-unknown-linux-gnueabihf
      rustflags: -Ctarget-feature=+neon
  thumbv7neon-unknown-linux-gnueabihf:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: thumbv7neon-unknown-linux-gnueabihf
  aarch64-unknown-linux-gnu-NEON:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: aarch64-unknown-linux-gnu
      rustflags: -Ctarget-feature=+neon
  powerpc-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: powerpc-unknown-linux-gnu
  powerpc64-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: powerpc64-unknown-linux-gnu
  powerpc64le-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: powerpc64le-unknown-linux-gnu
  powerpc64le-unknown-linux-gnu-ALTIVEC:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: powerpc64le-unknown-linux-gnu
      rustflags: -Ctarget-feature=+altivec
  powerpc64le-unknown-linux-gnu-VSX:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      target: powerpc64le-unknown-linux-gnu
      rustflags: -Ctarget-feature=+vsx
  s390x-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: s390x-unknown-linux-gnu
  sparc64-unknown-linux-gnu:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: sparc64-unknown-linux-gnu
  wasm32-unknown-unknown:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: false
    with:
      target: wasm32-unknown-unknown
  x86_64-apple-darwin-SSE4_2:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      runner: macos-latest
      script: ci/run.sh
      target: x86_64-apple-darwin
      rustflags: -Ctarget-feature=+sse4.2
  x86_64-apple-darwin-AVX:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      runner: macos-latest
      script: ci/run.sh
      target: x86_64-apple-darwin
      rustflags: -Ctarget-feature=+avx
  x86_64-apple-ios:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      runner: macos-latest
      script: ci/run.sh
      target: x86_64-apple-ios
  aarch64-apple-ios:
    uses: ./.github/workflows/run-ci-script.yml
    strategy:
      fail-fast: true
    with:
      runner: macos-latest
      script: ci/run.sh
      target: aarch64-apple-ios
      rustflags: -Ctarget-feature=+neon


================================================
FILE: .github/workflows/docs.yml
================================================
name: docs

on:
  push:
    branches:
      - master

jobs:
  docs:
    uses: ./.github/workflows/run-ci-script.yml
    with:
      setup_script: cargo install mdbook
      script: ci/dox.sh


================================================
FILE: .github/workflows/run-ci-script.yml
================================================
name: run-ci-script

on:
  workflow_call:
    inputs:
      runner:
        required: false
        type: string
        default: ubuntu-latest
      target:
        required: false
        type: string
        default: ''
      rustflags:
        required: false
        type: string
        default: ''
      script:
        required: false
        type: string
        default: ci/run-docker.sh
      setup_script:
        required: false
        type: string
      norun:
        required: false
        type: string
        default: ''
      verify:
        required: false
        type: string
        default: ''
      features:
        required: false
        type: string
        default: ''

jobs:
  run-ci-script:
    runs-on: ${{ inputs.runner }}
    steps:
      - name: Checkout
        uses: actions/checkout@v2
      - name: Init Rustup Cache
        uses: actions/cache@v2
        with:
          path: |
            ~/.rustup/toolchains
          key: ${{ runner.os }}-cargo-${{ hashFiles('**/rust-toolchain') }}
      - name: Install Toolchain
        uses: dtolnay/rust-toolchain@nightly
        with:
          # FIXME: change to nightly once https://github.com/rust-lang/packed_simd/pull/350 is merged
          # needs to be kept in sync with the toolchain files
          targets: ${{ inputs.target }}
          components: rustfmt
      - name: Generate Lockfile
        run: cargo generate-lockfile
      - name: Init Cargo Cache
        uses: actions/cache@v2
        with:
          path: |
            ~/.cargo/bin/
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            target/
          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
      - name: Setup
        if: ${{ inputs.setup_script != '' }}
        run: ${{ inputs.setup_script }}
        env:
          TARGET: ${{ inputs.target }}
          RUSTFLAGS: ${{ inputs.rustflags }}
          NORUN: ${{ inputs.norun }}
          VERIFY: ${{ inputs.verify }}
          FEATURES: ${{ inputs.features }}
      - name: Run CI Script
        timeout-minutes: 30
        run: ${{ inputs.script }}
        env:
          TARGET: ${{ inputs.target }}
          RUSTFLAGS: ${{ inputs.rustflags }}
          NORUN: ${{ inputs.norun }}
          VERIFY: ${{ inputs.verify }}
          FEATURES: ${{ inputs.features }}


================================================
FILE: .gitignore
================================================
Cargo.lock
target/

# llvm-ir and assembly
*.ll
*.d

# png files output by benchmarks
*.png

# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*

# Org-mode
.org-id-locations
*_archive

# flymake-mode
*_flymake.*

# eshell files
/eshell/history
/eshell/lastdir

# elpa packages
/elpa/

# reftex files
*.rel

# AUCTeX auto folder
/auto/

# cask packages
.cask/
dist/

# Flycheck
flycheck_*.el

# server auth directory
/server/

# projectiles files
.projectile

# directory configuration
.dir-locals.el

================================================
FILE: .travis.yml
================================================
language: rust
rust: nightly
os: linux
dist: focal

stages:
  - tools
  - build-test-verify # Passes full test suite, permit no regressions (unless it's rustup :/)
  - 32bit-tier1
  - 64bit-tier2
  - 32bit-tier2

jobs:
  fast_finish: true
  include:
    # Android:
    - env: TARGET=x86_64-linux-android
      name: "x86_64-unknown-linux-android + SSE2"
      stage: build-test-verify
    - env: TARGET=arm-linux-androideabi
      name: "arm-linux-androideabi"
      stage: build-test-verify
    - name: "aarch64-unknown-linux-android + NEON"
      env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
      stage: build-test-verify
    - env: TARGET="thumbv7neon-linux-androideabi"
      name: "thumbv7neon-linux-androideabi"
      stage: 32bit-tier2
    # Linux:
    - env: TARGET=i586-unknown-linux-gnu
      name: "i586-unknown-linux-gnu"
      stage: 32bit-tier2
    - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse"
      name: "i586-unknown-linux-gnu + SSE"
      stage: 32bit-tier2
    - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2"
      name: "i586-unknown-linux-gnu + SSE2"
      stage: 32bit-tier2
    - env: TARGET=i686-unknown-linux-gnu
      name: "i686-unknown-linux-gnu + SSE2"
      stage: 32bit-tier1
    - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
      name: "i686-unknown-linux-gnu + SSE4.2"
      stage: 32bit-tier1
    - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
      name: "i686-unknown-linux-gnu + AVX2"
      stage: 32bit-tier1
    - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
      name: "x86_64-unknown-linux-gnu + SSE4.2"
      stage: build-test-verify
    - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
      name: "x86_64-unknown-linux-gnu + AVX2"
      stage: build-test-verify
    - env: TARGET=arm-unknown-linux-gnueabihf
      name: "arm-unknown-linux-gnueabihf"
      stage: build-test-verify
    - env: TARGET=armv7-unknown-linux-gnueabihf
      name: "armv7-unknown-linux-gnueabihf"
      stage: build-test-verify
    - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
      name: "armv7-unknown-linux-gnueabihf + NEON"
      stage: build-test-verify
    - env: TARGET="thumbv7neon-unknown-linux-gnueabihf"
      name: "thumbv7neon-unknown-linux-gnueabihf"
      stage: 32bit-tier2
    - name: "aarch64-unknown-linux-gnu + NEON"
      env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon"
      stage: build-test-verify
    - env: TARGET=mips-unknown-linux-gnu
      name: "mips-unknown-linux-gnu"
      stage: 32bit-tier2
    - env: TARGET=mipsel-unknown-linux-musl
      name: "mipsel-unknown-linux-musl"
      stage: 32bit-tier2
    - env: TARGET=mips64-unknown-linux-gnuabi64
      name: "mips64-unknown-linux-gnuabi64"
      stage: 64bit-tier2
    - env: TARGET=mips64el-unknown-linux-gnuabi64
      name: "mips64el-unknown-linux-gnuabi64"
      stage: 64bit-tier2
      # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18
      # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6"
    - env: TARGET=powerpc-unknown-linux-gnu
      name: "powerpc-unknown-linux-gnu"
      stage: 32bit-tier2
    - env: TARGET=powerpc64-unknown-linux-gnu
      name: "powerpc64-unknown-linux-gnu"
      stage: 64bit-tier2
    - name: "powerpc64le-unknown-linux-gnu"
      env: TARGET=powerpc64le-unknown-linux-gnu
      stage: build-test-verify
    - name: "powerpc64le-unknown-linux-gnu + ALTIVEC"
      env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
      stage: build-test-verify
    - name: "powerpc64le-unknown-linux-gnu + VSX"
      env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
      stage: build-test-verify
    - name: "s390x-unknown-linux-gnu"
      env: TARGET=s390x-unknown-linux-gnu
      stage: 64bit-tier2
    - env: TARGET=sparc64-unknown-linux-gnu
      name: "sparc64-unknown-linux-gnu"
      stage: 64bit-tier2
    # WebAssembly:
    - env: TARGET=wasm32-unknown-unknown
      name: "wasm32-unknown-unknown"
      stage: 32bit-tier2
    # MacOSX:
    - os: osx
      env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
      name: "x86_64-apple-darwin + SSE4.2"
      install: true
      script: ci/run.sh
      osx_image: xcode10
      stage: build-test-verify
      # Travis-CI OSX build bots do not support AVX2:
    - os: osx
      env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx"
      name: "x86_64-apple-darwin + AVX"
      install: true
      script: ci/run.sh
      osx_image: xcode10
      stage: build-test-verify
    # *BSDs:
    #- env: TARGET=i686-unknown-freebsd NORUN=1
    #  script: ci/run.sh
    #- env: TARGET=x86_64-unknown-freebsd NORUN=1
    #  script: ci/run.sh
    #- env: TARGET=x86_64-unknown-netbsd NORUN=1
    #  script: ci/run.sh
    # Solaris:
    #- env: TARGET=x86_64-sun-solaris NORUN=1
    #  script: ci/run.sh
    # iOS:
    - os: osx
      env: TARGET=x86_64-apple-ios
      name: "x86_64-apple-ios + SSE2"
      script: ci/run.sh
      osx_image: xcode9.4
      stage: 64bit-tier2
    - name: "aarch64-apple-ios + NEON"
      env: TARGET=aarch64-apple-ios RUSTFLAGS="-C target-feature=+neon"
      os: osx
      osx_image: xcode9.4
      script: ci/run.sh
      stage: 64bit-tier2
    # BENCHMARKS:
    - name: "Benchmarks - x86_64-unknown-linux-gnu"
      install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh
      # FIXME: Use `core_arch,sleef-sys` features once they works again
      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
      stage: tools
    - name: "Benchmarks - x86_64-apple-darwin"
      install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh
      # FIXME: Use `core_arch,sleef-sys` features once they works again
      script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
      os: osx
      osx_image: xcode9.4
      stage: tools
    # TOOLS:
    - name: "Documentation"
      before_install:
        - sudo add-apt-repository -y ppa:deadsnakes/ppa
        - sudo apt-get update -y
        - sudo apt-get install -y python3.9
      install:
        - cargo install mdbook
      script: ci/dox.sh
      stage: tools
    - name: "rustfmt"
      install: true
      script: |
        rustup toolchain install nightly -c rustfmt --allow-downgrade
        ci/all.sh check_fmt || true
      stage: tools

  allow_failures:
    # FIXME: ISPC cannot be found?
    - name: "Benchmarks - x86_64-apple-darwin"
    # FIXME: i686 fails in inlining, apparently
    - stage: 32bit-tier1
    #- env: TARGET=i686-unknown-freebsd NORUN=1
    #- env: TARGET=x86_64-unknown-freebsd NORUN=1
    #- env: TARGET=x86_64-unknown-netbsd NORUN=1
    #- env: TARGET=x86_64-sun-solaris NORUN=1

    # FIXME: TBD
    - stage: 64bit-tier2
    - stage: 32bit-tier2

    # FIXME: iOS
    # https://github.com/rust-lang-nursery/packed_simd/issues/26
    - env: TARGET=x86_64-apple-ios
    # Is this related to the above? Mysterious test failure
    - name: "aarch64-apple-ios + NEON"

install: travis_retry rustup target add $TARGET
before_script: cargo generate-lockfile
script: travis_wait 50 ci/run-docker.sh
after_script: sleep 5

env:
  global:
    secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo="
branches:
  only:
    - staging # bors r+
    - trying  # bors try
    - master
notifications:
  email:
    on_success: never


================================================
FILE: Cargo.toml
================================================
[package]
name = "packed_simd"
version = "0.3.9"
description = "Portable Packed SIMD vectors"
documentation = "https://docs.rs/crate/packed_simd/"
homepage = "https://github.com/rust-lang/packed_simd"
repository = "https://github.com/rust-lang/packed_simd"
keywords = ["simd", "vector", "portability"]
categories = ["hardware-support", "concurrency", "no-std", "data-structures"]
license = "MIT OR Apache-2.0"
build = "build.rs"
edition = "2018"

[package.metadata.docs.rs]
features = ["into_bits"]
rustdoc-args = ["--cfg", "doc_cfg"]
# To build locally:
# RUSTDOCFLAGS="--cfg doc_cfg" cargo +nightly doc --features into_bits --no-deps --open

[badges]
is-it-maintained-issue-resolution = { repository = "rust-lang/packed_simd" }
is-it-maintained-open-issues = { repository = "rust-lang/packed_simd" }
maintenance = { status = "experimental" }

[dependencies]
cfg-if = "1.0.0"
core_arch = { version = "0.1.5", optional = true }
num-traits = { version = "0.2.14", default-features = false, features = ["libm"] }

[features]
default = []
into_bits = []
libcore_neon = []

[dev-dependencies]
paste = "^1"
arrayvec = { version = "^0.5", default-features = false }

[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys]
version = "0.1.2"
optional = true

[target.wasm32-unknown-unknown.dev-dependencies]
# Keep in sync with the version on Dockerfile.
wasm-bindgen = "=0.2.87"
wasm-bindgen-test = "=0.3.37"


================================================
FILE: LICENSE-APACHE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: LICENSE-MIT
================================================
Copyright (c) 2014 The Rust Project Developers

Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
# `Simd<[T; N]>`

## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]

[![Latest Version]][crates.io] [![docs]][master_docs]

**WARNING**: this crate only supports the most recent nightly Rust toolchain
and will be superseded by [`#![feature(portable_simd)]`](https://github.com/rust-lang/portable-simd).

## Documentation

* [API docs (`master` branch)][master_docs]
* [Performance guide][perf_guide]
* [API docs (`docs.rs`)][docs.rs]
* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
  discussion, etc.

## Examples

Most of the examples come with both a scalar and a vectorized implementation.

* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench)
* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux)
* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse)
* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot)
* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody)
* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing)
* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm)
* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform)
* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil)
* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product)

## Cargo features

* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait
  implementations for the vector types. These allow reinterpreting the bits of a
  vector type as those of another vector type safely by just using the
  `.into_bits()` method.

## Performance

The following [ISPC] examples are also part of `packed_simd`'s
[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/)
directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s
Single-Program-Multiple-Data (SPMD) programming model. The performance results
on different hardware is shown in the `readme.md` of each example. The following
table summarizes the performance ranges, where `+` means speed-up and `-`
slowdown:

* `aobench`: `[-1.02x, +1.53x]`,
* `stencil`: `[+1.06x, +1.72x]`,
* `mandelbrot`: `[-1.74x, +1.2x]`,
* `options_pricing`:
   * `black_scholes`: `+1.0x`
   * `binomial_put`: `+1.4x`

 While SPMD is not the intended use case for `packed_simd`, it is possible to
 combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming
 model in Rust. Writing performant code is not as straightforward as with
 [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one
 can easily match and often out-perform [ISPC]'s "default performance".

## Platform support

The following table describes the supported platforms: `build` shows whether
the library compiles without issues for a given target, while `run` shows
whether the test suite passes for a given target.

| **Linux**                             | **build** | **run** |
|---------------------------------------|-----------|---------|
| `i586-unknown-linux-gnu`              | ✓         | ✗       |
| `i686-unknown-linux-gnu`              | ✓         | ✗       |
| `x86_64-unknown-linux-gnu`            | ✓         | ✓       |
| `arm-unknown-linux-gnueabihf`         | ✓         | ✓       |
| `armv7-unknown-linux-gnueabi`         | ✓         | ✓       |
| `aarch64-unknown-linux-gnu`           | ✓         | ✓       |
| `powerpc-unknown-linux-gnu`           | ✓         | ✗       |
| `powerpc64-unknown-linux-gnu`         | ✓         | ✗       |
| `powerpc64le-unknown-linux-gnu`       | ✓         | ✓       |
| `s390x-unknown-linux-gnu`             | ✓         | ✗       |
| `sparc64-unknown-linux-gnu`           | ✓         | ✗       |
| `thumbv7neon-unknown-linux-gnueabihf` | ✓         | ✓       |
| **MacOSX**                            | **build** | **run** |
| `x86_64-apple-darwin`                 | ✓         | ✓       |
| **Android**                           | **build** | **run** |
| `x86_64-linux-android`                | ✓         | ✓       |
| `armv7-linux-androideabi`             | ✓         | ✗       |
| `aarch64-linux-android`               | ✓         | ✗       |
| `thumbv7neon-linux-androideabi`       | ✓         | ✗       |
| **iOS**                               | **build** | **run** |
| `x86_64-apple-ios`                    | ✗         | ✗       |
| `aarch64-apple-ios`                   | ✗         | ✗       |


## Machine code verification

The
[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify)
crate tests disassembles the portable packed vector APIs at run-time and
compares the generated machine code against the desired one to make sure that
this crate remains efficient.

## License

This project is licensed under either of

* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
  ([LICENSE-APACHE](LICENSE-APACHE))

* [MIT License](http://opensource.org/licenses/MIT)
  ([LICENSE-MIT](LICENSE-MIT))

at your option.

## Contributing

We welcome all people who want to contribute.
Please see the [contributing instructions] for more information.

Contributions in any form (issues, pull requests, etc.) to this project
must adhere to Rust's [Code of Conduct].

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.

[travis]: https://travis-ci.com/rust-lang/packed_simd
[Travis-CI Status]: https://travis-ci.com/rust-lang/packed_simd.svg?branch=master
[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
[crates.io]: https://crates.io/crates/packed_simd
[docs]: https://docs.rs/packed_simd/badge.svg
[docs.rs]: https://docs.rs/packed_simd
[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/
[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366
[ISPC]: https://ispc.github.io/
[rayon]: https://crates.io/crates/rayon
[boost_license]: https://www.boost.org/LICENSE_1_0.txt
[SLEEF]: https://sleef.org/
[sleef_sys]: https://crates.io/crates/sleef-sys
[contributing instructions]: contributing.md
[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html


================================================
FILE: bors.toml
================================================
status = [
    "continuous-integration/travis-ci/push"
]

================================================
FILE: build.rs
================================================
fn main() {
    let target = std::env::var("TARGET").expect("TARGET environment variable not defined");
    if target.contains("neon") {
        println!("cargo:rustc-cfg=libcore_neon");
    }
}


================================================
FILE: ci/all.sh
================================================
#!/usr/bin/env bash
#
# Performs an operation on all targets

set -ex

: "${1?The all.sh script requires one argument.}"

op=$1

cargo_clean() {
    cargo clean
}

cargo_check_fmt() {
    cargo fmt --all -- --check
}

cargo_fmt() {
    cargo fmt --all
}

cargo_clippy() {
    cargo clippy --all -- -D clippy::perf
}

CMD="-1"

case $op in
    clean*)
        CMD=cargo_clean
        ;;
    check_fmt*)
        CMD=cargo_check_fmt
        ;;
    fmt*)
        CMD=cargo_fmt
        ;;
    clippy)
        CMD=cargo_clippy
        ;;
    *)
        echo "Unknown operation: \"${op}\""
        exit 1
        ;;
esac

echo "Operation is: ${CMD}"

# On src/
$CMD

# Check examples/
for dir in examples/*/
do
    dir=${dir%*/}
    (
        cd "${dir%*/}"
        $CMD
    )
done

(
    cd verify/verify
    $CMD
)

(
    cd micro_benchmarks
    $CMD
)


================================================
FILE: ci/android-install-ndk.sh
================================================
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

set -ex

ANDROID_NDK_URL=https://dl.google.com/android/repository
ANDROID_NDK_ARCHIVE=android-ndk-r25b-linux.zip

curl -fO "$ANDROID_NDK_URL/$ANDROID_NDK_ARCHIVE"
unzip -q $ANDROID_NDK_ARCHIVE
rm $ANDROID_NDK_ARCHIVE
mv android-ndk-* ndk
rm -rf android-ndk-*


================================================
FILE: ci/android-install-sdk.sh
================================================
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

set -ex

# Prep the SDK and emulator
#
# Note that the update process requires that we accept a bunch of licenses, and
# we can't just pipe `yes` into it for some reason, so we take the same strategy
# located in https://github.com/appunite/docker by just wrapping it in a script
# which apparently magically accepts the licenses.

mkdir sdk
curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
unzip -d sdk sdk-tools-linux-3859397.zip

case "$1" in
  arm | armv7)
    abi=armeabi-v7a
    ;;

  aarch64)
    abi=arm64-v8a
    ;;

  i686)
    abi=x86
    ;;

  x86_64)
    abi=x86_64
    ;;

  *)
    echo "invalid arch: $1"
    exit 1
    ;;
esac;

# --no_https avoids
     # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
yes | ./sdk/tools/bin/sdkmanager --no_https \
        "emulator" \
        "platform-tools" \
        "platforms;android-24" \
        "system-images;android-24;default;$abi"

echo "no" |
    ./sdk/tools/bin/avdmanager create avd \
        --name "${1}" \
        --package "system-images;android-24;default;$abi"


================================================
FILE: ci/android-sysimage.sh
================================================
#!/usr/bin/env bash

# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

set -ex

URL=https://dl.google.com/android/repository/sys-img/android

main() {
    local arch="${1}"
    local name="${2}"
    local dest=/system
    local td
    td="$(mktemp -d)"

    apt-get install --no-install-recommends e2tools

    pushd "${td}"
    curl --retry 5 -O "${URL}/${name}"
    unzip -q "${name}"

    local system
    system="$(find . -name system.img)"
    mkdir -p ${dest}/{bin,lib,lib64}

    # Extract android linker and libraries to /system
    # This allows android executables to be run directly (or with qemu)
    if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
        e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
        e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
        e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
        e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
    else
        e2cp -p "${system}:/bin/linker" "${dest}/bin/"
        e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
        e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
        e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
    fi

    # clean up
    apt-get purge --auto-remove -y e2tools

    popd

    rm -rf "${td}"
}

main "${@}"


================================================
FILE: ci/benchmark.sh
================================================
#!/usr/bin/env bash
#
# Runs all benchmarks. Controlled by the following environment variables:
#
# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc)
# NORUN={1}   - only builds the benchmarks

set -ex

if [[ ${NORUN} != 1 ]]; then
    # Most benchmarks require hyperfine; require it upfront.
    hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
fi


# If the ispc benchmark feature is enabled, ispc must be in the path of the
# benchmarks. 
if echo "$FEATURES" | grep -q "ispc"; then
    hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
fi

# An example with a benchmark.sh is a benchmark:
for dir in examples/*/
do
    dir=${dir%*/}
    cd ${dir%*/}
    if [ -f "benchmark.sh" ]; then
        ./benchmark.sh
    fi
    cd -
done



================================================
FILE: ci/deploy_and_run_on_ios_simulator.rs
================================================
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// This is a script to deploy and execute a binary on an iOS simulator.
// The primary use of this is to be able to run unit tests on the simulator and
// retrieve the results.
//
// To do this through Cargo instead, use Dinghy
// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy
// test.

use std::env;
use std::fs::{self, File};
use std::io::Write;
use std::path::Path;
use std::process;
use std::process::Command;

macro_rules! t {
    ($e:expr) => (match $e {
        Ok(e) => e,
        Err(e) => panic!("{} failed with: {}", stringify!($e), e),
    })
}

// Step one: Wrap as an app
fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
    println!("Packaging simulator app");
    drop(fs::remove_dir_all("ios_simulator_app"));
    t!(fs::create_dir("ios_simulator_app"));
    t!(fs::copy(test_binary_path,
                Path::new("ios_simulator_app").join(crate_name)));

    let mut f = t!(File::create("ios_simulator_app/Info.plist"));
    t!(f.write_all(format!(r#"
        <?xml version="1.0" encoding="UTF-8"?>
        <!DOCTYPE plist PUBLIC
                "-//Apple//DTD PLIST 1.0//EN"
                "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
        <plist version="1.0">
            <dict>
                <key>CFBundleExecutable</key>
                <string>{}</string>
                <key>CFBundleIdentifier</key>
                <string>com.rust.unittests</string>
            </dict>
        </plist>
    "#, crate_name).as_bytes()));
}

// Step two: Start the iOS simulator
fn start_simulator() {
    println!("Looking for iOS simulator");
    let output = t!(Command::new("xcrun").arg("simctl").arg("list").output());
    assert!(output.status.success());
    let mut simulator_exists = false;
    let mut simulator_booted = false;
    let mut found_rust_sim = false;
    let stdout = t!(String::from_utf8(output.stdout));
    for line in stdout.lines() {
        if line.contains("rust_ios") {
            if found_rust_sim {
                panic!("Duplicate rust_ios simulators found. Please \
                        double-check xcrun simctl list.");
            }
            simulator_exists = true;
            simulator_booted = line.contains("(Booted)");
            found_rust_sim = true;
        }
    }

    if simulator_exists == false {
        println!("Creating iOS simulator");
        Command::new("xcrun")
                .arg("simctl")
                .arg("create")
                .arg("rust_ios")
                .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE")
                .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2")
                .check_status();
    } else if simulator_booted == true {
        println!("Shutting down already-booted simulator");
        Command::new("xcrun")
                .arg("simctl")
                .arg("shutdown")
                .arg("rust_ios")
                .check_status();
    }

    println!("Starting iOS simulator");
    // We can't uninstall the app (if present) as that will hang if the
    // simulator isn't completely booted; just erase the simulator instead.
    Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status();
    Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status();
}

// Step three: Install the app
fn install_app_to_simulator() {
    println!("Installing app to simulator");
    Command::new("xcrun")
            .arg("simctl")
            .arg("install")
            .arg("booted")
            .arg("ios_simulator_app/")
            .check_status();
}

// Step four: Run the app
fn run_app_on_simulator() {
    println!("Running app");
    let output = t!(Command::new("xcrun")
                    .arg("simctl")
                    .arg("launch")
                    .arg("--console")
                    .arg("booted")
                    .arg("com.rust.unittests")
                    .output());

    println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout));
    println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr));

    let stdout = String::from_utf8_lossy(&output.stdout);
    let failed = stdout.lines()
        .find(|l| l.contains("FAILED"))
        .map(|l| l.contains("FAILED"))
        .unwrap_or(false);

    let passed = stdout.lines()
        .find(|l| l.contains("test result: ok"))
        .map(|l| l.contains("test result: ok"))
        .unwrap_or(false);

    println!("Shutting down simulator");
    Command::new("xcrun")
        .arg("simctl")
        .arg("shutdown")
        .arg("rust_ios")
        .check_status();
    if !(passed && !failed) {
        panic!("tests didn't pass");
    }
}

trait CheckStatus {
    fn check_status(&mut self);
}

impl CheckStatus for Command {
    fn check_status(&mut self) {
        println!("\trunning: {:?}", self);
        assert!(t!(self.status()).success());
    }
}

fn main() {
    let args: Vec<String> = env::args().collect();
    if args.len() != 2 {
        println!("Usage: {} <executable>", args[0]);
        process::exit(-1);
    }

    let test_binary_path = Path::new(&args[1]);
    let crate_name = test_binary_path.file_name().unwrap();

    package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path);
    start_simulator();
    install_app_to_simulator();
    run_app_on_simulator();
}


================================================
FILE: ci/docker/aarch64-linux-android/Dockerfile
================================================
FROM ubuntu:16.04

RUN dpkg --add-architecture i386 && \
    apt-get update && \
    apt-get install -y --no-install-recommends \
  file \
  make \
  curl \
  ca-certificates \
  python \
  unzip \
  expect \
  openjdk-9-jre \
  libstdc++6:i386 \
  libpulse0 \
  gcc \
  libc6-dev

WORKDIR /android/
COPY android* /android/

ENV ANDROID_ARCH=aarch64
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools

RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*

ENV PATH=$PATH:/rust/bin \
    CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
    CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
    OBJDUMP=aarch64-linux-android-objdump \
    HOME=/tmp

ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
  "bash", \
  "-c", \
  # set SHELL so android can detect a 64bits system, see
  # http://stackoverflow.com/a/41789144
  "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
   rustc /tmp/runtest.rs -o /tmp/runtest && \
   exec \"$@\"", \
  "--" \
]


================================================
FILE: ci/docker/aarch64-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
  libc6-dev \
  gcc-aarch64-linux-gnu \
  libc6-dev-arm64-cross \
  qemu-user \
  make \
  file

ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
    CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
    OBJDUMP=aarch64-linux-gnu-objdump


================================================
FILE: ci/docker/arm-unknown-linux-gnueabi/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
  libc6-dev \
  libc6-armel-cross \
  libc6-dev-armel-cross \
  binutils-arm-linux-gnueabi \
  gcc-arm-linux-gnueabi \
  qemu-user \
  make \
  file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \
    CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \
    OBJDUMP=arm-linux-gnueabi-objdump


================================================
FILE: ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
  libc6-dev \
  gcc-arm-linux-gnueabihf \
  libc6-dev-armhf-cross \
  qemu-user \
  make \
  file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
    CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
    OBJDUMP=arm-linux-gnueabihf-objdump


================================================
FILE: ci/docker/armv7-linux-androideabi/Dockerfile
================================================
FROM ubuntu:16.04

RUN dpkg --add-architecture i386 && \
    apt-get update && \
    apt-get install -y --no-install-recommends \
  file \
  make \
  curl \
  ca-certificates \
  python \
  unzip \
  expect \
  openjdk-9-jre \
  libstdc++6:i386 \
  libpulse0 \
  gcc \
  libc6-dev

WORKDIR /android/
COPY android* /android/

ENV ANDROID_ARCH=arm
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools

RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*

ENV PATH=$PATH:/rust/bin \
    CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
    CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
    OBJDUMP=arm-linux-androideabi-objdump \
    HOME=/tmp

ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
  "bash", \
  "-c", \
  # set SHELL so android can detect a 64bits system, see
  # http://stackoverflow.com/a/41789144
  "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
   rustc /tmp/runtest.rs -o /tmp/runtest && \
   exec \"$@\"", \
  "--" \
]


================================================
FILE: ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
  libc6-dev \
  gcc-arm-linux-gnueabihf \
  libc6-dev-armhf-cross \
  qemu-user \
  make \
  file
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
    CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
    OBJDUMP=arm-linux-gnueabihf-objdump


================================================
FILE: ci/docker/i586-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
  file \
  make \
  ca-certificates


================================================
FILE: ci/docker/i686-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc-multilib \
  libc6-dev \
  file \
  make \
  ca-certificates


================================================
FILE: ci/docker/mips-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:18.04

RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        gcc-mips-linux-gnu libc6-dev-mips-cross \
        qemu-system-mips \
        qemu-user \
        make \
        file

ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
    CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
    OBJDUMP=mips-linux-gnu-objdump

================================================
FILE: ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
================================================
FROM ubuntu:18.04

RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
        qemu-system-mips64 qemu-user

ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
    CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
    OBJDUMP=mips64-linux-gnuabi64-objdump

================================================
FILE: ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
================================================
FROM ubuntu:18.04

RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
        qemu-system-mips64el

ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
    CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
    OBJDUMP=mips64el-linux-gnuabi64-objdump

================================================
FILE: ci/docker/mipsel-unknown-linux-musl/Dockerfile
================================================
FROM ubuntu:18.10

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    ca-certificates \
    gcc \
    libc6-dev \
    make \
    qemu-user \
    qemu-system-mips \
    bzip2 \
    curl \
    file

RUN mkdir /toolchain

# Note that this originally came from:
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
      tar xjf - -C /toolchain --strip-components=2

ENV PATH=$PATH:/rust/bin:/toolchain/bin \
    CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
    CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"


================================================
FILE: ci/docker/powerpc-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:22.04

RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
        qemu-system-ppc \
        make \
        file

ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
    CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
    CC=powerpc-linux-gnu-gcc \
    OBJDUMP=powerpc-linux-gnu-objdump


================================================
FILE: ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:22.04

RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    ca-certificates \
    libc6-dev \
    gcc-powerpc64-linux-gnu \
    libc6-dev-ppc64-cross \
    qemu-user  \
    qemu-system-ppc \
    make \
    file 

ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
    CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \
    CC=powerpc64-linux-gnu-gcc \
    OBJDUMP=powerpc64-linux-gnu-objdump


================================================
FILE: ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:22.04

RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc libc6-dev qemu-user ca-certificates \
        gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
        qemu-system-ppc file make

ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
    CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \
    CC=powerpc64le-linux-gnu-gcc \
    OBJDUMP=powerpc64le-linux-gnu-objdump


================================================
FILE: ci/docker/s390x-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:22.04

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    ca-certificates \
    curl \
    cmake \
    gcc \
    libc6-dev \
    g++-s390x-linux-gnu \
    libc6-dev-s390x-cross \
    qemu-user \
    make \
    file

ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
    CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
    CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \
    CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \
    OBJDUMP=s390x-linux-gnu-objdump


================================================
FILE: ci/docker/sparc64-unknown-linux-gnu/Dockerfile
================================================
FROM debian:bookworm

RUN apt-get update && apt-get install -y --no-install-recommends \
        curl ca-certificates \
        gcc libc6-dev \
        gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \
        qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \
        p7zip-full cpio

COPY linux-sparc64.sh /
RUN bash /linux-sparc64.sh

COPY test-runner-linux /

ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \
    CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \
    CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \
    PATH=$PATH:/rust/bin


================================================
FILE: ci/docker/thumbv7neon-linux-androideabi/Dockerfile
================================================
FROM ubuntu:16.04

RUN dpkg --add-architecture i386 && \
    apt-get update && \
    apt-get install -y --no-install-recommends \
  file \
  make \
  curl \
  ca-certificates \
  python \
  unzip \
  expect \
  openjdk-9-jre \
  libstdc++6:i386 \
  libpulse0 \
  gcc \
  libc6-dev

WORKDIR /android/
COPY android* /android/

ENV ANDROID_ARCH=arm
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools

RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*

ENV PATH=$PATH:/rust/bin \
    CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
    CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
    OBJDUMP=arm-linux-androideabi-objdump \
    HOME=/tmp

ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
  "bash", \
  "-c", \
  # set SHELL so android can detect a 64bits system, see
  # http://stackoverflow.com/a/41789144
  "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
   rustc /tmp/runtest.rs -o /tmp/runtest && \
   exec \"$@\"", \
  "--" \
]


================================================
FILE: ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  ca-certificates \
  libc6-dev \
  gcc-arm-linux-gnueabihf \
  libc6-dev-armhf-cross \
  qemu-user \
  make \
  file
ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
    CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
    OBJDUMP=arm-linux-gnueabihf-objdump


================================================
FILE: ci/docker/wasm32-unknown-unknown/Dockerfile
================================================
FROM ubuntu:22.04

RUN apt-get update -y && apt-get install -y --no-install-recommends \
  ca-certificates \
  clang \
  cmake \
  curl \
  git \
  libc6-dev \
  make \
  ninja-build \
  python-is-python3 \
  xz-utils

# Install `wasm2wat`
RUN git clone --recursive https://github.com/WebAssembly/wabt
RUN make -C wabt -j$(nproc)
ENV PATH=$PATH:/wabt/bin

# Install `wasm-bindgen-test-runner`
RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.87/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl.tar.gz \
  | tar xzf -
# Keep in sync with the version on Cargo.toml.
ENV PATH=$PATH:/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner

# Install `node`
RUN curl https://nodejs.org/dist/v14.16.0/node-v14.16.0-linux-x64.tar.xz | tar xJf -
ENV PATH=$PATH:/node-v14.16.0-linux-x64/bin

# We use a shim linker that removes `--strip-debug` when passed to LLD. While
# this typically results in invalid debug information in release mode it doesn't
# result in an invalid names section which is what we're interested in.
COPY lld-shim.rs /
ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim

# Rustc isn't available until this container starts, so defer compilation of the
# shim.
ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"


================================================
FILE: ci/docker/x86_64-linux-android/Dockerfile
================================================
FROM ubuntu:20.04

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
  ca-certificates \
  curl \
  gcc \
  libc-dev \
  python \
  unzip \
  file \
  make

WORKDIR /android/
ENV ANDROID_ARCH=x86_64
COPY android-install-ndk.sh /android/
RUN sh /android/android-install-ndk.sh

ENV STDARCH_ASSERT_INSTR_LIMIT=30

# We do not run x86_64-linux-android tests on an android emulator.
# See ci/android-sysimage.sh for informations about how tests are run.
COPY android-sysimage.sh /android/
RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip

ENV PATH=$PATH:/rust/bin:/android/ndk/toolchains/llvm/prebuilt/linux-x86_64/bin \
    CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android21-clang \
    CC_x86_64_linux_android=x86_64-linux-android21-clang \
    CXX_x86_64_linux_android=x86_64-linux-android21-clang++ \
    OBJDUMP=llvm-objdump \
    HOME=/tmp


================================================
FILE: ci/docker/x86_64-unknown-linux-gnu/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  libc6-dev \
  file \
  make \
  ca-certificates \
  cmake \
  libclang-dev \
  clang


================================================
FILE: ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
================================================
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
  gcc \
  libc6-dev \
  file \
  make \
  ca-certificates \
  wget \
  bzip2 \
  cmake \
  libclang-dev \
  clang

RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2
RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --"


================================================
FILE: ci/dox.sh
================================================
#!/bin/sh

set -ex

rm -rf target/doc
mkdir -p target/doc

# Build API documentation
cargo doc --features=into_bits

# Build Performance Guide
# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780
# mdbook build perf-guide -d target/doc/perf-guide
cd perf-guide
mdbook build
cd -
cp -r perf-guide/book target/doc/perf-guide

# If we're on travis, not a PR, and on the right branch, publish!
if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
  python3 -vV
  pip -vV
  python3.9 -vV
  pip install ghp_import --user
  ghp-import -n target/doc
  git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
fi


================================================
FILE: ci/linux-s390x.sh
================================================
set -ex

mkdir -m 777 /qemu
cd /qemu

curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian
curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian

mv kernel.debian kernel
mv initrd.debian initrd.gz

mkdir init
cd init
gunzip -c ../initrd.gz | cpio -id
rm ../initrd.gz
cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/
chmod a+w .


================================================
FILE: ci/linux-sparc64.sh
================================================
set -ex

mkdir -m 777 /qemu
cd /qemu

curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso
7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz
7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64
mv sparc64 kernel
rm debian-9.0-sparc64-NETINST-1.iso

mkdir init
cd init
gunzip -c ../initrd.gz | cpio -id
rm ../initrd.gz
cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/
chmod a+w .


================================================
FILE: ci/lld-shim.rs
================================================
use std::os::unix::prelude::*;
use std::process::Command;
use std::env;

fn main() {
    let args = env::args()
        .skip(1)
        .filter(|s| s != "--strip-debug")
        .collect::<Vec<_>>();
    panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
}


================================================
FILE: ci/max_line_width.sh
================================================
#!/usr/bin/env sh

set -x

export success=true

find . -iname '*.rs' | while read -r file; do
    result=$(grep '.\{79\}' "${file}" | grep --invert 'http')
    if [ "${result}" = "" ]
    then
        :
    else
        echo "file \"${file}\": $result"
        exit 1
    fi
done



================================================
FILE: ci/run-docker.sh
================================================
# Small script to run tests for a target (or all targets) inside all the
# respective docker images.

set -ex

run() {
    echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}"
    docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/
    mkdir -p target
    target=$(echo "${TARGET}" | sed 's/-emulated//')
    echo "Running docker"
    docker run \
      --user `id -u`:`id -g` \
      --rm \
      --init \
      --volume $HOME/.cargo:/cargo \
      --env CARGO_HOME=/cargo \
      --volume `rustc --print sysroot`:/rust:ro \
      --env TARGET=$target \
      --env NORUN \
      --env NOVERIFY \
      --env RUSTFLAGS \
      --volume `pwd`:/checkout:ro \
      --volume `pwd`/target:/checkout/target \
      --workdir /checkout \
      --privileged \
      packed_simd \
      bash \
      -c 'PATH=$PATH:/rust/bin exec ci/run.sh'
}

if [ -z "${TARGET}" ]; then
  for d in `ls ci/docker/`; do
    run $d
  done
else
  run ${TARGET}
fi


================================================
FILE: ci/run.sh
================================================
#!/usr/bin/env bash

set -ex

: ${TARGET?"The TARGET environment variable must be set."}

# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
#export RUST_TEST_THREADS=1
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1

# Some appveyor builds run out-of-memory; this attempts to mitigate that:
# https://github.com/rust-lang-nursery/packed_simd/issues/39
# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1"
# export CARGO_BUILD_JOBS=1

export CARGO_SUBCMD=test
if [[ "${NORUN}" == "1" ]]; then
    export CARGO_SUBCMD=build
fi

if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then
    export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0"
    rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest
    export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest
    export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest
fi

# The source directory is read-only. Need to copy internal crates to the target
# directory for their Cargo.lock to be properly written.
mkdir target || true

rustc --version
cargo --version
echo "TARGET=${TARGET}"
echo "HOST=${HOST}"
echo "RUSTFLAGS=${RUSTFLAGS}"
echo "NORUN=${NORUN}"
echo "NOVERIFY=${NOVERIFY}"
echo "CARGO_SUBCMD=${CARGO_SUBCMD}"
echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}"
echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}"
echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}"
echo "RUST_BACKTRACE=${RUST_BACKTRACE}"
echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}"

cargo_test() {
    cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}"
    if [ "${NORUN}" != "1" ]
    then
        if [ "$TARGET" != "wasm32-unknown-unknown" ]
        then
            cmd="$cmd -- --quiet"
        fi
    fi
    mkdir target || true
    ${cmd} 2>&1 | tee > target/output
    if [[ ${PIPESTATUS[0]} != 0 ]]; then
        cat target/output
        return 1
    fi
}

cargo_test_impl() {
    ORIGINAL_RUSTFLAGS=${RUSTFLAGS}
    RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16  --cfg test_v32 --cfg test_v64" cargo_test ${@}
    RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@}
    RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@}
    RUSTFLAGS=${ORIGINAL_RUSTFLAGS}
}

# Debug run:
if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then
   # Run wasm32-unknown-unknown in release mode only
   cargo_test_impl
fi

if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
    # use sleef on linux and windows x86_64 builds
    # FIXME: Use `core_arch,sleef-sys` features once they works again
    cargo_test_impl --release --features=into_bits
else
    # FIXME: Use `core_arch` feature once it works again
    cargo_test_impl --release --features=into_bits
fi

# Verify code generation
if [[ "${NOVERIFY}" != "1" ]]; then
    cp -r verify/verify target/verify
    export STDSIMD_ASSERT_INSTR_LIMIT=30
    if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then
        export STDSIMD_ASSERT_INSTR_LIMIT=50
    fi
    cargo_test --release --manifest-path=target/verify/Cargo.toml
fi

# FIXME: Figure out which examples take too long to run and ignore or adjust those
#. ci/run_examples.sh


================================================
FILE: ci/run_examples.sh
================================================
# Runs all examples.

# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55
# All examples fail to build for `armv7-apple-ios`.
if [[ ${TARGET} == "armv7-apple-ios" ]]; then
    exit 0
fi

# FIXME: travis exceeds 50 minutes on these targets
# Skipping the examples is an attempt at preventing travis from timing-out
if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \
    || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then
    exit 0
fi

if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then
    exit 0
fi

cp -r examples/aobench target/aobench
cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features
cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit

cp -r examples/dot_product target/dot_product
cargo_test --manifest-path=target/dot_product/Cargo.toml --release

cp -r examples/fannkuch_redux target/fannkuch_redux
cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release

# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56
if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
    cp -r examples/mandelbrot target/mandelbrot
    cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release
fi

cp -r examples/matrix_inverse target/matrix_inverse
cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release

cp -r examples/nbody target/nbody
cargo_test --manifest-path=target/nbody/Cargo.toml --release

cp -r examples/spectral_norm target/spectral_norm
cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release

if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
    cp -r examples/stencil target/stencil
    cargo_test --manifest-path=target/stencil/Cargo.toml --release
fi

cp -r examples/triangle_xform target/triangle_xform
cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release


================================================
FILE: ci/runtest-android.rs
================================================
use std::env;
use std::process::Command;
use std::path::{Path, PathBuf};

fn main() {
    let args = env::args_os()
        .skip(1)
        .filter(|arg| arg != "--quiet")
        .collect::<Vec<_>>();
    assert_eq!(args.len(), 1);
    let test = PathBuf::from(&args[0]);
    let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());

    let status = Command::new("adb")
        .arg("wait-for-device")
        .status()
        .expect("failed to run: adb wait-for-device");
    assert!(status.success());

    let status = Command::new("adb")
        .arg("push")
        .arg(&test)
        .arg(&dst)
        .status()
        .expect("failed to run: adb pushr");
    assert!(status.success());

    let output = Command::new("adb")
        .arg("shell")
        .arg(&dst)
        .output()
        .expect("failed to run: adb shell");
    assert!(status.success());

    println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
             output.status,
             String::from_utf8_lossy(&output.stdout),
             String::from_utf8_lossy(&output.stderr));

    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
    if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
        panic!("failed to find successful test run");
    }
}


================================================
FILE: ci/setup_benchmarks.sh
================================================
#!/usr/bin/env bash

set -ex

# Get latest ISPC binary for the target and put it in the path
git clone https://github.com/gnzlbg/ispc-binaries
cp ispc-binaries/ispc-${TARGET} ispc


================================================
FILE: ci/test-runner-linux
================================================
#!/bin/sh

set -e

arch=$1
prog=$2

cd /qemu/init
cp -f $2 prog
find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz
cd ..

timeout 30s qemu-system-$arch \
        -m 1024 \
        -nographic \
        -kernel kernel \
        -initrd initrd.gz \
        -append init=/prog > output || true

# remove kernel messages
tr -d '\r' < output | egrep -v '^\['

# if the output contains a failure, return error
! grep FAILED output > /dev/null


================================================
FILE: contributing.md
================================================
# Contributing to `packed_simd`

Welcome! If you are reading this document, it means you are interested in contributing
to the `packed_simd` crate.

## Reporting issues

All issues with this crate are tracked using GitHub's [Issue Tracker].

You can use issues to bring bugs to the attention of the maintainers, to discuss
certain problems encountered with the crate, or to request new features (although
feature requests should be limited to things mentioned in the [RFC]).

One thing to keep in mind is to always use the **latest** nightly toolchain when
working on this crate. Due to the nature of this project, we use a lot of unstable
features, meaning breakage happens often.

[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues
[RFC]: https://github.com/rust-lang/rfcs/pull/2366

### LLVM issues

The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation,
and quite a few LLVM bugs have been discovered during the development of this project.

If you encounter issues with incorrect/suboptimal codegen, which you do not encounter
when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/),
it is likely the issue is with LLVM, or this crate's interaction with it.

You should first open an issue **in this repo** to help us track the problem, and we
will help determine what is the exact cause of the problem.
If LLVM is indeed the cause, the issue will be reported upstream to the
[LLVM bugtracker](https://bugs.llvm.org/).

## Submitting Pull Requests

New code is submitted to the crate using GitHub's [pull request] mechanism.
You should first fork this repository, make your changes (preferably in a new
branch), then use GitHub's web UI to create a new PR.

[pull request]: https://help.github.com/articles/about-pull-requests/

### Examples

The `examples` directory contains code showcasing SIMD code written with this crate,
usually in comparison to scalar or ISPC code. If you have a project / idea which
uses SIMD, we'd love to add it to the examples list.

Every example should include a small `README`, describing the example code's purpose.
If your example could potentially work as a benchmark, then add a `benchmark.sh`
script to allow running the example benchmark code in CI. See an existing example's
[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample.

Don't forget to update the crate's top-level `README` with a link to your example.

### Perf guide

The objective of the [performance guide][perf-guide] is to be a comprehensive
resource detailing the process of optimizing Rust code with SIMD support.

If you believe a certain section could be reworded, or if you have any tips & tricks
related to SIMD which you'd like to share, please open a PR.

[mdBook] is used to manage the formatting of the guide as a book.

[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
[mdBook]: https://github.com/rust-lang-nursery/mdBook


================================================
FILE: examples/Cargo.toml
================================================
# FIXME: Many members of this workspace, including aobench, mandelbrot, and stencil,
# currently trigger a "null pointer deref" warning.
# This is likely due to unsoundness inside packed_simd.
[workspace]
members = [
    "aobench",
    "dot_product",
    "fannkuch_redux",
    "mandelbrot",
    "matrix_inverse",
    "nbody",
    "options_pricing",
    "slice_sum",
    "spectral_norm",
    "stencil",
    "triangle_xform",
]

[profile.release]
# Remember to uncomment this when profiling
# debug = 2

# You can set the following to lto = 'thin' and 'codegen-units=16'
# for better compile times at the cost of performance
lto = 'fat'
codegen-units = 1
incremental = false
panic = 'abort'

[profile.bench]
# Same as above
lto = 'fat'
codegen-units = 1
incremental = false

================================================
FILE: examples/aobench/Cargo.toml
================================================
[package]
name = "aobench"
version = "0.1.0"
authors = ["gnzlbg <gonzalobg88@gmail.com>"]
autobenches = false
edition = "2018"

[[bin]]
name = "aobench"
path = "src/main.rs"

[lib]
name = "aobench_lib"
path = "src/lib.rs"

[dependencies]
structopt = "^0.3"
failure = "^0.1"
png = "^0.15"
packed_simd = { package = "packed_simd", path = "../.." }
rayon = "^1.0"
time = "^0.1"
cfg-if = "^0.1"
ispc = { version = "^1.0.4", optional = true }

[build-dependencies]
ispc = { version = "^1.0.4", optional = true }

[dev-dependencies]
criterion = { version = '^0.3', features=['real_blackbox'] }

[features]
default = [ "256bit" ]
256bit = []
sleef-sys = [ "packed_simd/sleef-sys" ]
core_arch = [ "packed_simd/core_arch" ]

[[bench]]
name = "isec_sphere"
path = "benches/isec_sphere.rs"
harness = false

[[bench]]
name = "isec_plane"
path = "benches/isec_plane.rs"
harness = false

[[bench]]
name = "ambient_occlusion"
path = "benches/ambient_occlusion.rs"
harness = false

[[bench]]
name = "random"
path = "benches/random.rs"
harness = false


================================================
FILE: examples/aobench/benches/ambient_occlusion.rs
================================================
//! Benchmarks intersection between rays and planes
#![feature(stdsimd)]

use aobench_lib::*;
use criterion::*;
use intersection::Isect;
use aobench_lib::scene::Test;

fn hit_scalar(c: &mut Criterion) {
    let mut scene = Test::default();
    c.bench(
        "scalar",
        Benchmark::new("ao_hit", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut scene);
                let mut v = ambient_occlusion::scalar(s, isect);
                black_box(&mut v);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn hit_vector(c: &mut Criterion) {
    let mut scene = Test::default();

    c.bench(
        "vector",
        Benchmark::new("ao_hit", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut scene);
                let mut v = ambient_occlusion::vector(s, isect);
                black_box(&mut v);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

criterion_group!(benches, hit_scalar, hit_vector);
criterion_main!(benches);


================================================
FILE: examples/aobench/benches/isec_plane.rs
================================================
//! Benchmarks intersection between rays and planes
#![feature(stdsimd)]

use criterion::*;

use crate::geometry::{f32xN, Plane, Ray, RayxN, V3DxN, V3D};
use crate::intersection::{Intersect, Isect, IsectxN};
use aobench_lib::*;

fn hit_scalar(c: &mut Criterion) {
    let mut s = Plane {
        p: V3D {
            x: 0.,
            y: 0.,
            z: 10.,
        },
        n: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };
    let mut r = Ray {
        origin: V3D {
            x: 0.,
            y: 0.,
            z: 0.,
        },
        dir: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };

    c.bench(
        "scalar",
        Benchmark::new("isec_plane_hit", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit, true);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn miss_scalar(c: &mut Criterion) {
    let mut s = Plane {
        p: V3D {
            x: 0.,
            y: 0.,
            z: -10.,
        },
        n: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };
    let mut r = Ray {
        origin: V3D {
            x: 0.,
            y: 0.,
            z: 0.,
        },
        dir: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };

    c.bench(
        "scalar",
        Benchmark::new("isec_plane_miss", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit, false);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn hit_vector(c: &mut Criterion) {
    let mut s = Plane {
        p: V3D {
            x: 0.,
            y: 0.,
            z: 10.,
        },
        n: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };
    let mut r = RayxN {
        origin: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(0.),
        },
        dir: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(1.),
        },
    };

    c.bench(
        "vector",
        Benchmark::new("isec_plane_hit", move |b| {
            b.iter(|| {
                let mut isect = IsectxN::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit.all(), true);
            })
        })
        .throughput(Throughput::Elements(f32xN::lanes() as u64)),
    );
}

fn miss_vector(c: &mut Criterion) {
    let mut s = Plane {
        p: V3D {
            x: 0.,
            y: 0.,
            z: -10.,
        },
        n: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };
    let mut r = RayxN {
        origin: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(0.),
        },
        dir: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(1.),
        },
    };

    c.bench(
        "vector",
        Benchmark::new("isec_plane_miss", move |b| {
            b.iter(|| {
                let mut isect = IsectxN::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit.any(), false);
            })
        })
        .throughput(Throughput::Elements(f32xN::lanes() as u64)),
    );
}

criterion_group!(benches, hit_scalar, miss_scalar, hit_vector, miss_vector);
criterion_main!(benches);


================================================
FILE: examples/aobench/benches/isec_sphere.rs
================================================
//! Benchmarks intersection between rays and spheres
#![feature(stdsimd)]

use crate::geometry::{f32xN, Ray, RayxN, Sphere, V3DxN, V3D};
use crate::intersection::{Intersect, Isect, IsectxN};
use aobench_lib::*;
use criterion::*;

fn hit_scalar(c: &mut Criterion) {
    let mut s = Sphere {
        center: V3D {
            x: 0.,
            y: 0.,
            z: 10.,
        },
        radius: 1.,
    };

    let mut r = Ray {
        origin: V3D {
            x: 0.,
            y: 0.,
            z: 0.,
        },
        dir: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };

    c.bench(
        "scalar",
        Benchmark::new("isec_sphere_hit", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit, true);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn miss_scalar(c: &mut Criterion) {
    let mut s = Sphere {
        center: V3D {
            x: 0.,
            y: 0.,
            z: -10.,
        },
        radius: 1.,
    };
    let mut r = Ray {
        origin: V3D {
            x: 0.,
            y: 0.,
            z: 0.,
        },
        dir: V3D {
            x: 0.,
            y: 0.,
            z: 1.,
        },
    };

    c.bench(
        "scalar",
        Benchmark::new("isec_sphere_miss", move |b| {
            b.iter(|| {
                let mut isect = Isect::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit, false);
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn hit_vector(c: &mut Criterion) {
    let mut s = Sphere {
        center: V3D {
            x: 0.,
            y: 0.,
            z: 10.,
        },
        radius: 1.,
    };
    let mut r = RayxN {
        origin: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(0.),
        },
        dir: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(1.),
        },
    };

    c.bench(
        "vector",
        Benchmark::new("isec_sphere_hit", move |b| {
            b.iter(|| {
                let mut isect = IsectxN::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit.all(), true);
            })
        })
        .throughput(Throughput::Elements(f32xN::lanes() as u64)),
    );
}

fn miss_vector(c: &mut Criterion) {
    let mut s = Sphere {
        center: V3D {
            x: 0.,
            y: 0.,
            z: -10.,
        },
        radius: 1.,
    };
    let mut r = RayxN {
        origin: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(0.),
        },
        dir: V3DxN {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(1.),
        },
    };

    c.bench(
        "vector",
        Benchmark::new("isec_sphere_miss", move |b| {
            b.iter(|| {
                let mut isect = IsectxN::default();
                let isect = black_box(&mut isect);
                let s = black_box(&mut s);
                let r = black_box(&mut r);
                let mut v = r.intersect(s, *isect);
                black_box(&mut v);
                assert_eq!(v.hit.any(), false);
            })
        })
        .throughput(Throughput::Elements(f32xN::lanes() as u64)),
    );
}

criterion_group!(benches, hit_scalar, miss_scalar, hit_vector, miss_vector);
criterion_main!(benches);


================================================
FILE: examples/aobench/benches/random.rs
================================================
//! Benchmarks PNRG
#![feature(stdsimd)]

use aobench_lib::geometry::f32xN;
use aobench_lib::random;
use criterion::*;

fn random_scalar(c: &mut Criterion) {
    c.bench(
        "scalar",
        Benchmark::new("random", move |b| {
            let mut rng = random::scalar::thread_rng();
            b.iter(|| {
                black_box(rng.gen());
            })
        })
        .throughput(Throughput::Elements(1)),
    );
}

fn random_vector(c: &mut Criterion) {
    c.bench(
        "vector",
        Benchmark::new("random", move |b| {
            let mut rng = random::vector::thread_rng();
            b.iter(|| {
                black_box(rng.gen());
            })
        })
        .throughput(Throughput::Elements(f32xN::lanes() as u64)),
    );
}

criterion_group!(benches, random_scalar, random_vector);
criterion_main!(benches);


================================================
FILE: examples/aobench/benches/scanlines.rs
================================================
#![feature(test)]

use test::{black_box, Bencher};

#[bench]
fn scanlines_scalar(b: &mut Bencher) {
    let width = 50;
    let height = 50;
    let width = black_box(width);
    let height = black_box(height);

    let mut fdata = Vec::new();
    fdata.resize(width * height * 3, 0.);
    fdata = black_box(fdata);
    b.iter(|| {
        black_box(&mut fdata);
        aobench_lib::scalar::scanlines(0, height, width, height, 2, &mut fdata);
    });
}

#[bench]
fn scanlines_vector(b: &mut Bencher) {
    let width = 50;
    let height = 50;
    let width = black_box(width);
    let height = black_box(height);

    let mut fdata = Vec::new();
    fdata.resize(width * height * 3, 0.);
    fdata = black_box(fdata);
    b.iter(|| {
        black_box(&mut fdata);
        aobench_lib::vector::scanlines(0, height, width, height, 2, &mut fdata);
    });
}


================================================
FILE: examples/aobench/benchmark.sh
================================================
#!/usr/bin/env bash
#
# Runs aobench benchmarks

set -ex

export WIDTH=800
export HEIGHT=600

if [[ ${NORUN} != 1 ]]; then
    hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
fi

ALGS=("scalar" "scalar_par" "vector" "vector_par" "tiled" "tiled_par")
if echo "$FEATURES" | grep -q "ispc"; then
    hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
    ALGS+=("ispc" "ispc_tasks")
fi

echo "Benchmark 256-bit wide vectors"
RUSTFLAGS="-C target-cpu=native ${RUSTFLAGS}" \
         cargo build --release --no-default-features \
         --features="${FEATURES},256bit"

if [[ "${VERIFY}" == "1" ]]; then
    RUSTFLAGS="-C target-cpu=native ${RUSTFLAGS}" \
    cargo test --release --no-default-features \
          --features="${FEATURES},256bit"
fi

if [[ "${NORUN}" == "1" ]]; then
    exit 0
fi

for alg in "${ALGS[@]}"
do
    hyperfine "../target/release/aobench ${WIDTH} ${HEIGHT} --algo ${alg}"
done

echo "Benchmark 128-bit wide vectors"
RUSTFLAGS="-C target-cpu=native ${RUSTFLAGS}" \
         cargo build --release --no-default-features \
         --features="${FEATURES}"
for alg in "${ALGS[@]}"
do
    hyperfine "../target/release/aobench ${WIDTH} ${HEIGHT} --algo ${alg}"
done


================================================
FILE: examples/aobench/build.rs
================================================
fn main() {
    println!("cargo:rerun-if-changed=build.rs");

    #[cfg(feature = "ispc")]
    {
        if std::env::var("CARGO_FEATURE_ISPC").is_ok() {
            let mut cfg = ispc::Config::new();

            if cfg!(windows) {
                cfg.debug(false);
            }

            let ispc_files = vec!["volta/ao.ispc"];

            for s in &ispc_files[..] {
                cfg.file(*s);
            }

            cfg.target_isas(vec![
                ispc::opt::TargetISA::SSE2i32x4,
                ispc::opt::TargetISA::SSE4i32x4,
                ispc::opt::TargetISA::AVX1i32x8,
                ispc::opt::TargetISA::AVX2i32x8,
                ispc::opt::TargetISA::AVX512KNLi32x16,
            ]);

            cfg.compile("aobench");
        }
    }
}


================================================
FILE: examples/aobench/readme.md
================================================
# Ambient Occlusion Benchmark

> Originally written by Syoyo Fujita: https://github.com/syoyo/aobench

`aoench` is a small ambient occlusion renderer for benchmarking realworld
floating point performance in various languages.

![image_vector_par](https://user-images.githubusercontent.com/904614/41043073-653aa5be-69a3-11e8-8a9d-007def8516cc.png)

## Instructions


To run it with the default target options (replace `${NAME}` with an algorithm name):

```
> cargo run --release -- 800 600 --algo ${NAME}
```

Use `RUSTFLAGS` to set the target CPU, for example:

```
> RUSTFLAGS="-C target-cpu=native" cargo run --release -- 800 600 --algo ${NAME}
```

## Results

```
./benchmark.sh
```

On a dual core AVX1 i5 @1.8 GHz:

| 800 x 600    | time [ms] <br> Rust | speedup vs `scalar` [-] |
|--------------|---------------------|-------------------------|
| `scalar`     | 5884                | 1.0x                    |
| `scalar_par` | 2206                | 2.7x                    |
| `vector`     | 1458                | 4.0x                    |
| `vector_par` | 622                 | 9.5x                    |
| `tiled`      | 1328                | 4.4x                    |
| `tiled_par`  | 578                 | 10.2x                   |
| `ispc`       | 1158                | 5.1x                    |
| `ispc_tasks` | 567                 | 10.4x                   |

`tiled_par` is 1.02x slower than `ispc_tasks`.

On a 28 core Xeon CPU E5-2690 v4 @ 2.60GHz:

| 800 x 600    | time [ms] <br> Rust | speedup vs `scalar` [-] |
|--------------|---------------------|-------------------------|
| `scalar`     | 2981                | 1.0x                    |
| `scalar_par` | 163                 | 18.2x                   |
| `vector`     | 692                 | 4.3x                    |
| `vector_par` | 98                  | 30.4x                   |
| `tiled`      | 640                 | 4.7x                    |
| `tiled_par`  | 98                  | 30.4x                   |
| `ispc`       | 576                 | 5.2x                    |
| `ispc_tasks` | 150                 | 19.9x                   |

`tiled_par` is 1.53x faster than `ispc_tasks`.


On a 40 core Xeon Gold 6148 CPU @ 2.40GHz:

| 800 x 600    | time [ms] <br> Rust | speedup vs `scalar` [-] |
|--------------|---------------------|-------------------------|
| `scalar`     | 3215                | 1.0x                    |
| `scalar_par` | 186                 | 17.0x                   |
| `vector`     | 802                 | 4.0x                    |
| `vector_par` | 106                 | 30.3x                   |
| `tiled`      | 770                 | 4.2x                    |
| `tiled_par`  | 102                 | 32.1x                   |
| `ispc`       | 491                 | 6.5x                    |
| `ispc_tasks` | 153                 | 21.7x                   |

`tiled_par` is 1.5x faster than `ispc_tasks`.

## Overview

There are 4 main pieces in the `aobench` benchmark:

* ray-plane intersection algorithm: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/intersection/ray_plane.rs)
* ray-sphere intersection algorithm: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/intersection/ray_sphere.rs)
* ambient occlusion algorithm: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/ambient_occlusion.rs)
* ray-casting the pixels:
  * scalar serial: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/scalar.rs)
  * scalar parallel: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/scalar_parallel.rs)
  * vector serial: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/vector.rs)
  * vector parallel: [source](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench/src/vector_parallel.rs)

The scalar and vectorized implementations of the intersection and ao algorithms
are in the same file so that they can be easily compared.

As a comparison, the ISPC sources of the same benchmark are [here](https://github.com/ispc/ispc/tree/master/examples/aobench).


================================================
FILE: examples/aobench/rustfmt.toml
================================================
max_width = 79

================================================
FILE: examples/aobench/src/ambient_occlusion.rs
================================================
//! Ambient Occlusion implementations

use crate::geometry::{f32xN, Ray, RayxN, Selectable, V3DxN, V3D};
use crate::intersection::{Intersect, Isect, IsectxN};
use crate::scene::Scene;
use std::f32::consts::PI;

/// Scalar ambient occlusion algorithm
#[inline(always)]
pub fn scalar<S: Scene>(scene: &mut S, isect: &Isect) -> f32 {
    let mut occlusion: f32 = 0.0;

    let basis = isect.n.ortho_basis();
    let eps: f32 = 0.0001;
    let origin = isect.p + eps * isect.n;

    let ntheta: usize = S::NAO_SAMPLES;
    let nphi: usize = S::NAO_SAMPLES;
    for _i in 0..ntheta {
        for _j in 0..nphi {
            let theta = scene.rand().sqrt();
            let phi = 2. * PI * scene.rand();

            let n = V3D {
                x: phi.cos() * theta,
                y: phi.sin() * theta,
                z: (1.0 - theta * theta).sqrt(),
            };
            let dir = basis * n;
            let ray = Ray { origin, dir };

            let mut occ_isect = Isect::default();
            for s in scene.spheres() {
                occ_isect = ray.intersect(s, occ_isect);
            }
            occ_isect = ray.intersect(scene.plane(), occ_isect);

            if occ_isect.hit {
                occlusion += 1.;
            }
        }
    }

    1. - occlusion / (ntheta * nphi) as f32
}

/// Vectorized ambient occlusion algorithm using ray packets
#[inline(always)]
pub fn vector<S: Scene>(scene: &mut S, isect: &Isect) -> f32 {
    let mut occlusion = f32xN::splat(0.0);

    let basis = isect.n.ortho_basis();
    let eps: f32 = 0.0001;
    let origin = isect.p + eps * isect.n;
    let origin = V3DxN {
        x: f32xN::splat(origin.x),
        y: f32xN::splat(origin.y),
        z: f32xN::splat(origin.z),
    };

    let ntheta: usize = S::NAO_SAMPLES;
    let nphi: usize = S::NAO_SAMPLES;
    for _i in 0..ntheta {
        for _j in (0..nphi).step_by(f32xN::lanes()) {
            let (theta, phi) = scene.rand_f32xN();
            let theta = theta.sqrte();
            let (sin, cos) = (2. * phi).sin_cos_pi();

            let n = V3DxN {
                x: cos * theta,
                y: sin * theta,
                z: (f32xN::splat(1.0) - theta * theta).sqrt(),
            };
            let dir = basis * n;
            let ray = RayxN { origin, dir };

            let mut occ_isect = IsectxN::default();
            for s in scene.spheres() {
                occ_isect = ray.intersect(s, occ_isect);
            }
            occ_isect = ray.intersect(scene.plane(), occ_isect);

            occlusion += occ_isect.hit.sel(f32xN::splat(1.), f32xN::splat(0.));
        }
    }

    1. - occlusion.sum() / (ntheta * nphi) as f32
}

/// Vectorized ambient occlusion algorithm using ray packets
#[inline(always)]
pub fn vector_tiled<S: Scene>(scene: &mut S, isect: &IsectxN) -> f32xN {
    let mut occlusion = f32xN::splat(0.0);

    let basis = isect.n.ortho_basis();
    let eps = f32xN::splat(0.0001);
    let origin = isect.p + eps * isect.n;

    let ntheta: usize = S::NAO_SAMPLES;
    let nphi: usize = S::NAO_SAMPLES;
    for _i in 0..ntheta {
        for _j in 0..nphi {
            let (theta, phi) = scene.rand_f32xN();
            let theta = theta.sqrte();
            let (sin, cos) = (2. * phi).sin_cos_pi();

            let n = V3DxN {
                x: cos * theta,
                y: sin * theta,
                z: (1.0 - theta * theta).sqrt(),
            };
            let dir = basis * n;
            let ray = RayxN { origin, dir };

            let mut occ_isect = IsectxN::default();
            for s in scene.spheres() {
                occ_isect = ray.intersect(s, occ_isect);
            }
            occ_isect = ray.intersect(scene.plane(), occ_isect);

            occlusion += occ_isect.hit.sel(f32xN::splat(1.), f32xN::splat(0.));
        }
    }

    f32xN::splat(1.) - occlusion / (ntheta * nphi) as f32
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::geometry::V3D;

    #[test]
    fn sanity_hit() {
        let scene = crate::scene::Test::default();
        let mut scene_scalar = scene.clone();
        let mut scene_vector = scene.clone();
        let ray = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: -0.2,
                y: -0.2,
                z: -0.2,
            },
        };
        let mut isect = Isect::default();

        for s in scene.spheres() {
            isect = ray.intersect(s, isect);
        }
        isect = ray.intersect(scene.plane(), isect);

        assert!(isect.hit);

        let ao_scalar = scalar(&mut scene_scalar, &isect);
        let ao_vector = vector(&mut scene_vector, &isect);
        assert_eq!(ao_scalar, ao_vector);
    }

    #[test]
    fn sanity_miss() {
        let scene = crate::scene::Test::default();
        let mut scene_scalar = scene.clone();
        let mut scene_vector = scene.clone();

        let ray = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: 0.2,
                y: 0.2,
                z: 0.2,
            },
        };
        let mut isect = Isect::default();

        for s in scene.spheres() {
            isect = ray.intersect(s, isect);
        }
        isect = ray.intersect(scene.plane(), isect);

        assert!(!isect.hit);

        let ao_scalar = scalar(&mut scene_scalar, &isect);
        let ao_vector = vector(&mut scene_vector, &isect);
        assert_eq!(ao_scalar, ao_vector);
    }

}


================================================
FILE: examples/aobench/src/geometry/mod.rs
================================================
//! Geometry utilities

use packed_simd::*;

mod plane;
mod ray;
mod sphere;
mod vec;

mod rayxN;
mod vecxN;

pub use self::plane::Plane;
pub use self::ray::Ray;
pub use self::sphere::Sphere;
pub use self::vec::{Dot, M3x3, V3D};

pub use self::rayxN::RayxN;
pub use self::vecxN::{Selectable, V3DxN};

#[cfg(feature = "256bit")]
pub type f32xN = f32x8;
#[cfg(feature = "256bit")]
pub type u32xN = u32x8;
#[cfg(feature = "256bit")]
pub type usizexN = usizex8;
#[cfg(feature = "256bit")]
pub type m32xN = m32x8;
#[cfg(feature = "256bit")]
pub type pf32xN = Simd<[*mut f32; 8]>;

#[cfg(not(feature = "256bit"))]
pub type f32xN = f32x4;
#[cfg(not(feature = "256bit"))]
pub type u32xN = u32x4;
#[cfg(not(feature = "256bit"))]
pub type usizexN = usizex4;
#[cfg(not(feature = "256bit"))]
pub type m32xN = m32x4;
#[cfg(not(feature = "256bit"))]
pub type pf32xN = Simd<[*mut f32; 4]>;

pub trait IncrV {
    type Element;
    fn incr(x: Self::Element, step: Self::Element) -> Self;
}

impl IncrV for f32xN {
    type Element = f32;
    #[inline(always)]
    fn incr(x: f32, step: f32) -> Self {
        #[cfg(feature = "256bit")]
        {
            Self::new(
                x + 0. * step,
                x + 1. * step,
                x + 2. * step,
                x + 3. * step,
                x + 4. * step,
                x + 5. * step,
                x + 6. * step,
                x + 7. * step,
            )
        }
        #[cfg(not(feature = "256bit"))]
        {
            Self::new(
                x + 0. * step,
                x + 1. * step,
                x + 2. * step,
                x + 3. * step,
            )
        }
    }
}

impl IncrV for u32xN {
    type Element = u32;
    #[inline(always)]
    fn incr(x: u32, step: u32) -> Self {
        #[cfg(feature = "256bit")]
        {
            Self::new(
                x + 0 * step,
                x + 1 * step,
                x + 2 * step,
                x + 3 * step,
                x + 4 * step,
                x + 5 * step,
                x + 6 * step,
                x + 7 * step,
            )
        }
        #[cfg(not(feature = "256bit"))]
        {
            Self::new(x + 0 * step, x + 1 * step, x + 2 * step, x + 3 * step)
        }
    }
}

impl IncrV for usizexN {
    type Element = usize;
    #[inline(always)]
    fn incr(x: usize, step: usize) -> Self {
        #[cfg(feature = "256bit")]
        {
            Self::new(
                x + 0 * step,
                x + 1 * step,
                x + 2 * step,
                x + 3 * step,
                x + 4 * step,
                x + 5 * step,
                x + 6 * step,
                x + 7 * step,
            )
        }
        #[cfg(not(feature = "256bit"))]
        {
            Self::new(x + 0 * step, x + 1 * step, x + 2 * step, x + 3 * step)
        }
    }
}


================================================
FILE: examples/aobench/src/geometry/plane.rs
================================================
//! Plane

use crate::geometry::V3D;

#[derive(Copy, Clone, Debug)]
pub struct Plane {
    pub p: V3D,
    pub n: V3D,
}


================================================
FILE: examples/aobench/src/geometry/ray.rs
================================================
//! A ray

use crate::geometry::V3D;

/// Ray starting at `origin` in `dir` direction.
#[derive(Copy, Clone, Debug)]
pub struct Ray {
    pub origin: V3D,
    pub dir: V3D,
}


================================================
FILE: examples/aobench/src/geometry/rayxN.rs
================================================
//! Four packed rays

use crate::geometry::{Ray, V3DxN};

/// Four packed rays starting at `origin` in `dir` direction.
#[derive(Copy, Clone, Debug)]
pub struct RayxN {
    pub origin: V3DxN,
    pub dir: V3DxN,
}

impl RayxN {
    pub fn get(&self, idx: usize) -> Ray {
        Ray {
            origin: self.origin.get(idx),
            dir: self.dir.get(idx),
        }
    }
}


================================================
FILE: examples/aobench/src/geometry/sphere.rs
================================================
//! Sphere

use crate::geometry::V3D;

#[derive(Copy, Clone, Debug)]
pub struct Sphere {
    pub center: V3D,
    pub radius: f32,
}


================================================
FILE: examples/aobench/src/geometry/vec.rs
================================================
//! A simple vector type

use std::ops::*;

#[derive(Copy, Clone, Debug, PartialEq)]
pub struct V3D {
    pub x: f32,
    pub y: f32,
    pub z: f32,
}

impl Default for V3D {
    #[inline(always)]
    #[must_use]
    fn default() -> Self {
        Self {
            x: 0.,
            y: 0.,
            z: 0.,
        }
    }
}

pub type M3x3 = [V3D; 3];

impl V3D {
    #[inline(always)]
    #[must_use]
    pub fn cross(self, o: Self) -> Self {
        Self {
            x: self.y * o.z - self.z * o.y,
            y: self.z * o.x - self.x * o.z,
            z: self.x * o.y - self.y * o.x,
        }
    }
    #[inline(always)]
    #[must_use]
    pub fn normalized(self) -> Self {
        let len2 = self.dot(self);
        let invlen = len2.sqrt().recip();
        invlen * self
    }
    #[inline(always)]
    #[must_use]
    pub fn ortho_basis(self) -> M3x3 {
        let n = self;
        let mut basis = [Self::default(), Self::default(), n];

        if n.x < 0.6 && n.x > -0.6 {
            basis[1].x = 1.0;
        } else if n.y < 0.6 && n.y > -0.6 {
            basis[1].y = 1.0;
        } else if n.z < 0.6 && n.z > -0.6 {
            basis[1].z = 1.0;
        } else {
            basis[1].x = 1.0;
        }

        basis[0] = basis[1].cross(basis[2]).normalized();
        basis[1] = basis[2].cross(basis[0]).normalized();
        basis
    }
    // Fuzzy float comparison between vectors
    #[inline(always)]
    #[must_use]
    pub fn almost_eq(&self, rhs: &Self) -> bool {
        const EPSILON: f32 = 1E-3;
        (self.x - rhs.x).abs() < EPSILON
            && (self.y - rhs.y).abs() < EPSILON
            && (self.z - rhs.z).abs() < EPSILON
    }
}

impl Add for V3D {
    type Output = Self;
    #[inline(always)]
    fn add(self, o: Self) -> Self::Output {
        Self {
            x: self.x + o.x,
            y: self.y + o.y,
            z: self.z + o.z,
        }
    }
}

impl Sub for V3D {
    type Output = Self;
    #[inline(always)]
    fn sub(self, o: Self) -> Self::Output {
        Self {
            x: self.x - o.x,
            y: self.y - o.y,
            z: self.z - o.z,
        }
    }
}

impl Mul for V3D {
    type Output = Self;
    fn mul(self, o: Self) -> Self::Output {
        Self {
            x: self.x * o.x,
            y: self.y * o.y,
            z: self.z * o.z,
        }
    }
}

impl Mul<f32> for V3D {
    type Output = Self;
    #[inline(always)]
    fn mul(self, o: f32) -> Self::Output {
        Self {
            x: self.x * o,
            y: self.y * o,
            z: self.z * o,
        }
    }
}

impl Mul<V3D> for f32 {
    type Output = V3D;
    #[inline(always)]
    fn mul(self, o: V3D) -> Self::Output {
        o * self
    }
}

impl Mul<V3D> for M3x3 {
    type Output = V3D;
    #[inline(always)]
    fn mul(self, o: V3D) -> Self::Output {
        V3D {
            x: o.dot(V3D {
                x: self[0].x,
                y: self[1].x,
                z: self[2].x,
            }),
            y: o.dot(V3D {
                x: self[0].y,
                y: self[1].y,
                z: self[2].y,
            }),
            z: o.dot(V3D {
                x: self[0].z,
                y: self[1].z,
                z: self[2].z,
            }),
        }
    }
}

/// Vector dot product
pub trait Dot<O> {
    type Output;
    fn dot(self, _: O) -> Self::Output;
}

impl Dot<V3D> for V3D {
    type Output = f32;
    #[inline(always)]
    fn dot(self, o: Self) -> Self::Output {
        self.x * o.x + self.y * o.y + self.z * o.z
    }
}


================================================
FILE: examples/aobench/src/geometry/vecxN.rs
================================================
//! A simple vector type

use std::ops::*;

use crate::geometry::{f32xN, m32xN, Dot, M3x3, V3D};

#[derive(Copy, Clone, Debug)]
pub struct V3DxN {
    pub x: f32xN,
    pub y: f32xN,
    pub z: f32xN,
}

impl Default for V3DxN {
    #[inline(always)]
    #[must_use]
    fn default() -> Self {
        Self {
            x: f32xN::splat(0.),
            y: f32xN::splat(0.),
            z: f32xN::splat(0.),
        }
    }
}

impl V3DxN {
    #[inline(always)]
    #[must_use]
    pub fn normalized(self) -> Self {
        let len2 = self.dot(self);
        let invlen = len2.rsqrte();
        invlen * self
    }

    pub fn get(&self, idx: usize) -> V3D {
        V3D {
            x: self.x.extract(idx),
            y: self.y.extract(idx),
            z: self.z.extract(idx),
        }
    }

    #[must_use]
    #[inline(always)]
    pub fn ortho_basis(self) -> [Self; 3] {
        let n = self;
        let mut basis = [Self::default(), Self::default(), n];

        let max = f32xN::splat(0.6);
        let min = f32xN::splat(-0.6);
        let one = f32xN::splat(1.0);

        let mx = n.x.lt(max) & n.x.gt(min);
        let my = n.y.lt(max) & n.y.gt(min);
        let mz = n.z.lt(max) & n.z.gt(min);

        basis[1].x = (mx | (!mx & !my & !mz)).select(one, basis[1].x);
        basis[1].y = (!mx & my).select(one, basis[1].y);
        basis[1].z = (!mx & !my & mz).select(one, basis[1].z);

        basis[0] = basis[1].cross(basis[2]).normalized();
        basis[1] = basis[2].cross(basis[0]).normalized();
        basis
    }

    #[inline(always)]
    #[must_use]
    pub fn cross(self, o: Self) -> Self {
        Self {
            x: self.y * o.z - self.z * o.y,
            y: self.z * o.x - self.x * o.z,
            z: self.x * o.y - self.y * o.x,
        }
    }
}

impl Add for V3DxN {
    type Output = Self;
    #[inline(always)]
    fn add(self, o: Self) -> Self::Output {
        Self {
            x: self.x + o.x,
            y: self.y + o.y,
            z: self.z + o.z,
        }
    }
}

impl Mul for V3DxN {
    type Output = Self;
    #[inline(always)]
    fn mul(self, o: Self) -> Self::Output {
        Self {
            x: self.x * o.x,
            y: self.y * o.y,
            z: self.z * o.z,
        }
    }
}

impl Mul<V3DxN> for f32xN {
    type Output = V3DxN;
    #[inline(always)]
    fn mul(self, o: V3DxN) -> Self::Output {
        V3DxN {
            x: self * o.x,
            y: self * o.y,
            z: self * o.z,
        }
    }
}

impl Mul<V3DxN> for [V3DxN; 3] {
    type Output = V3DxN;
    #[inline(always)]
    fn mul(self, o: V3DxN) -> Self::Output {
        V3DxN {
            x: o.dot(V3DxN {
                x: self[0].x,
                y: self[1].x,
                z: self[2].x,
            }),
            y: o.dot(V3DxN {
                x: self[0].y,
                y: self[1].y,
                z: self[2].y,
            }),
            z: o.dot(V3DxN {
                x: self[0].z,
                y: self[1].z,
                z: self[2].z,
            }),
        }
    }
}

impl Sub<V3D> for V3DxN {
    type Output = Self;
    #[inline(always)]
    fn sub(self, o: V3D) -> Self::Output {
        Self {
            x: self.x - f32xN::splat(o.x),
            y: self.y - f32xN::splat(o.y),
            z: self.z - f32xN::splat(o.z),
        }
    }
}

impl Dot<V3DxN> for V3DxN {
    type Output = f32xN;
    #[inline(always)]
    fn dot(self, o: Self) -> Self::Output {
        self.x.mul_adde(o.x, self.y.mul_adde(o.y, self.z * o.z))
    }
}

impl Dot<V3D> for V3DxN {
    type Output = f32xN;
    #[inline(always)]
    fn dot(self, o: V3D) -> Self::Output {
        self.x.mul_adde(
            f32xN::splat(o.x),
            self.y.mul_adde(f32xN::splat(o.y), self.z * o.z),
        )
    }
}

pub trait Selectable<O, P> {
    type Output;
    fn sel(self, a: O, b: P) -> Self::Output;
}

impl Selectable<f32xN, f32xN> for m32xN {
    type Output = f32xN;
    #[inline(always)]
    fn sel(self, a: f32xN, b: f32xN) -> f32xN {
        self.select(a, b)
    }
}

impl Selectable<V3DxN, V3DxN> for m32xN {
    type Output = V3DxN;
    #[inline(always)]
    fn sel(self, a: V3DxN, b: V3DxN) -> V3DxN {
        V3DxN {
            x: self.select(a.x, b.x),
            y: self.select(a.y, b.y),
            z: self.select(a.z, b.z),
        }
    }
}

impl Selectable<V3D, V3DxN> for m32xN {
    type Output = V3DxN;
    #[inline(always)]
    fn sel(self, a: V3D, b: V3DxN) -> V3DxN {
        V3DxN {
            x: self.select(f32xN::splat(a.x), b.x),
            y: self.select(f32xN::splat(a.y), b.y),
            z: self.select(f32xN::splat(a.z), b.z),
        }
    }
}

impl Mul<V3DxN> for M3x3 {
    type Output = V3DxN;
    #[inline(always)]
    fn mul(self, o: V3DxN) -> Self::Output {
        V3DxN {
            x: o.x.mul_adde(
                f32xN::splat(self[0].x),
                o.y.mul_adde(
                    f32xN::splat(self[1].x),
                    o.z * f32xN::splat(self[2].x),
                ),
            ),
            y: o.x.mul_adde(
                f32xN::splat(self[0].y),
                o.y.mul_adde(
                    f32xN::splat(self[1].y),
                    o.z * f32xN::splat(self[2].y),
                ),
            ),
            z: o.x.mul_adde(
                f32xN::splat(self[0].z),
                o.y.mul_adde(
                    f32xN::splat(self[1].z),
                    o.z * f32xN::splat(self[2].z),
                ),
            ),
        }
    }
}


================================================
FILE: examples/aobench/src/image.rs
================================================
//! Image utilities

use failure::Error;
#[allow(unused)]
use png::{BitDepth, ColorType, Encoder};
use std::path::Path;

/// PNG image in RGB format
pub struct Image {
    width: usize,
    height: usize,
    data: Vec<u8>,
    pub fdata: Vec<f32>,
}

impl Image {
    pub fn new(width: usize, height: usize) -> Self {
        Self {
            width,
            height,
            data: vec![0_u8; width * height * 3 /* RGBA */],
            fdata: vec![0_f32; width * height * 3 /* RGBA */],
        }
    }

    /// Image's `(width, height)`
    pub fn size(&self) -> (usize, usize) {
        (self.width, self.height)
    }
    /// Writes the pixels into a png image at `output`.
    ///
    /// `soa` specifies whether the bytes in `fdata` are in a Struct of Arrays (rrr...ggg...bbb...)
    /// or Array of Structs (rgbrgbrgb...) format.
    pub fn write_png(
        &mut self,
        output: &Path,
        soa: bool,
    ) -> Result<(), Error> {
        fn clamp(x: f32) -> u8 {
            let mut i = (x * 255.5) as isize;

            if i < 0 {
                i = 0
            };
            if i > 255 {
                i = 255
            };

            i as u8
        }

        use std::fs::File;
        use std::io::BufWriter;

        let file = File::create(output)?;
        let buf_writer = &mut BufWriter::new(file);
        let mut encoder = Encoder::new(
            buf_writer,
            self.width as u32,
            self.height as u32,
        );

        encoder.set_color(ColorType::RGB);
        encoder.set_depth(BitDepth::Eight);
        let mut writer = encoder.write_header().unwrap();

        if soa {
            let len = (self.width * self.height) as usize;
            let (r, tail) = self.fdata.split_at(len);
            let (g, b) = tail.split_at(len);
            assert!(r.len() == len);
            assert!(g.len() == len);
            assert!(b.len() == len);

            for i in 0..len {
                self.data[3 * i + 0] = clamp(r[i]);
                self.data[3 * i + 1] = clamp(g[i]);
                self.data[3 * i + 2] = clamp(b[i]);
            }
        } else {
            for (&fp, up) in self.fdata.iter().zip(self.data.iter_mut()) {
                (*up) = clamp(fp);
            }
        }

        writer.write_image_data(&self.data)?;
        Ok(())
    }
}


================================================
FILE: examples/aobench/src/intersection/mod.rs
================================================
//! Intersection functions

/// Intersection of `I` with `Self`
pub trait Intersect<I> {
    type Isect;
    fn intersect(&self, other: &I, isect: Self::Isect) -> Self::Isect;
}

mod packet;
mod ray_plane;
mod ray_sphere;
mod single;

pub use self::packet::IsectxN;
pub use self::single::Isect;


================================================
FILE: examples/aobench/src/intersection/packet.rs
================================================
//! SIMD intersection result

use crate::geometry::{f32xN, m32xN, V3DxN};
use crate::intersection::Isect;

/// Intersection result
#[derive(Copy, Clone, Debug)]
pub struct IsectxN {
    pub t: f32xN,
    pub p: V3DxN,
    pub n: V3DxN,
    pub hit: m32xN,
}

impl Default for IsectxN {
    #[inline]
    fn default() -> Self {
        Self {
            t: f32xN::splat(1e17),
            hit: m32xN::splat(false),
            p: V3DxN::default(),
            n: V3DxN::default(),
        }
    }
}

impl IsectxN {
    pub fn get(&self, idx: usize) -> Isect {
        Isect {
            t: self.t.extract(idx),
            p: self.p.get(idx),
            n: self.n.get(idx),
            hit: self.hit.extract(idx),
        }
    }
}


================================================
FILE: examples/aobench/src/intersection/ray_plane.rs
================================================
//! Intersection of a ray with a plane

use crate::geometry::{f32xN, Dot, Plane, Ray, RayxN, Selectable};
use crate::intersection::{Intersect, Isect, IsectxN};

// Scalar ray-plane intersection
impl Intersect<Plane> for Ray {
    type Isect = Isect;
    #[inline(always)]
    fn intersect(&self, plane: &Plane, mut isect: Isect) -> Isect {
        let ray = self;
        let d = -plane.p.dot(plane.n);
        let v = ray.dir.dot(plane.n);

        if v.abs() < 1e-17 {
            return isect;
        }

        let t = -(ray.origin.dot(plane.n) + d) / v;

        if t > 0. && t < isect.t {
            isect.t = t;
            isect.hit = true;
            isect.p = ray.origin + t * ray.dir;
            isect.n = plane.n;
        }

        isect
    }
}

// Vector ray-plane intersection for a packet of rays
impl Intersect<Plane> for RayxN {
    type Isect = IsectxN;
    #[inline(always)]
    fn intersect(&self, plane: &Plane, mut isect: IsectxN) -> IsectxN {
        let ray = self;
        let d = -plane.p.dot(plane.n);
        let v = ray.dir.dot(plane.n);

        let _old_isect = isect;

        let m = v.abs().ge(f32xN::splat(1e-17));
        if m.any() {
            let t = m.sel(-(ray.origin.dot(plane.n) + d) / v, isect.t);
            let m = m & t.gt(f32xN::splat(0.)) & t.lt(isect.t);

            if m.any() {
                isect.t = m.sel(t, isect.t);
                isect.hit |= m;
                isect.p = m.sel(ray.origin + t * ray.dir, isect.p);
                isect.n = m.sel(plane.n, isect.n);
            }
        }

        #[cfg(debug_assertions)]
        {
            // Check that the vector and the scalar version produce the same results
            // for the same inputs in debug builds
            for i in 0..f32xN::lanes() {
                let old_isect_i = _old_isect.get(i);
                let ray_i = self.get(i);
                let isect_i = ray_i.intersect(plane, old_isect_i);
                assert!(isect_i.almost_eq(&isect.get(i)), "{:?} !~= {:?}\n\nplane: {:?}\n\nold_isect: {:?}\n\nrays: {:?}\n\ni: {:?}\nold_isect_i: {:?}\nray_i: {:?}\n\n", isect_i, isect.get(i), plane, _old_isect, self, i, old_isect_i, ray_i);
            }
        }

        isect
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::geometry::{m32xN, V3DxN, V3D};

    #[test]
    fn sanity() {
        let plane = Plane {
            p: V3D {
                x: 0.,
                y: 0.,
                z: -10.,
            },
            n: V3D {
                x: 0.,
                y: 0.,
                z: 1.,
            },
        };

        let ray_hit = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: 0.01,
                y: 0.01,
                z: -1.,
            },
        };
        let ray_miss = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: 0.,
                y: 0.,
                z: 1.,
            },
        };

        let isect_hit = ray_hit.intersect(&plane, Isect::default());
        assert!(isect_hit.hit);
        let isect_miss = ray_miss.intersect(&plane, Isect::default());
        assert!(!isect_miss.hit);

        // hit, miss, hit, miss

        #[cfg(feature = "256bit")]
        let z_val = f32xN::new(-1., 1., -1., 1., -1., 1., -1., 1.);
        #[cfg(not(feature = "256bit"))]
        let z_val = f32xN::new(-1., 1., -1., 1.);

        let rays = RayxN {
            origin: V3DxN::default(),
            dir: V3DxN {
                x: f32xN::splat(0.01),
                y: f32xN::splat(0.01),
                z: z_val,
            },
        };

        let isectxN = rays.intersect(&plane, IsectxN::default());

        #[cfg(feature = "256bit")]
        let expected =
            m32xN::new(true, false, true, false, true, false, true, false);
        #[cfg(not(feature = "256bit"))]
        let expected = m32xN::new(true, false, true, false);

        assert_eq!(isectxN.hit, expected);

        assert_eq!(isect_hit.t, isectxN.t.extract(0));
        assert_eq!(isect_hit.t, isectxN.t.extract(2));
        assert_eq!(isect_miss.t, isectxN.t.extract(1));
        assert_eq!(isect_miss.t, isectxN.t.extract(3));

        assert_eq!(isect_hit.p.x, isectxN.p.x.extract(0));
        assert_eq!(isect_hit.p.y, isectxN.p.y.extract(0));
        assert_eq!(isect_hit.p.z, isectxN.p.z.extract(0));

        assert_eq!(isect_hit.p.x, isectxN.p.x.extract(2));
        assert_eq!(isect_hit.p.y, isectxN.p.y.extract(2));
        assert_eq!(isect_hit.p.z, isectxN.p.z.extract(2));

        assert_eq!(isect_miss.p.x, isectxN.p.x.extract(1));
        assert_eq!(isect_miss.p.y, isectxN.p.y.extract(1));
        assert_eq!(isect_miss.p.z, isectxN.p.z.extract(1));

        assert_eq!(isect_miss.p.x, isectxN.p.x.extract(3));
        assert_eq!(isect_miss.p.y, isectxN.p.y.extract(3));
        assert_eq!(isect_miss.p.z, isectxN.p.z.extract(3));

        assert_eq!(isect_hit.n.x, isectxN.n.x.extract(0));
        assert_eq!(isect_hit.n.y, isectxN.n.y.extract(0));
        assert_eq!(isect_hit.n.z, isectxN.n.z.extract(0));

        assert_eq!(isect_hit.n.x, isectxN.n.x.extract(2));
        assert_eq!(isect_hit.n.y, isectxN.n.y.extract(2));
        assert_eq!(isect_hit.n.z, isectxN.n.z.extract(2));

        assert_eq!(isect_miss.n.x, isectxN.n.x.extract(1));
        assert_eq!(isect_miss.n.y, isectxN.n.y.extract(1));
        assert_eq!(isect_miss.n.z, isectxN.n.z.extract(1));

        assert_eq!(isect_miss.n.x, isectxN.n.x.extract(3));
        assert_eq!(isect_miss.n.y, isectxN.n.y.extract(3));
        assert_eq!(isect_miss.n.z, isectxN.n.z.extract(3));
    }

    #[test]
    fn bug() {
        let plane = Plane {
            p: V3D {
                x: 0.,
                y: -0.5,
                z: 0.,
            },
            n: V3D {
                x: 0.,
                y: 1.,
                z: 0.,
            },
        };
        let isect = IsectxN {
            t: f32xN::splat(2.1931846),
            p: V3DxN {
                x: f32xN::splat(-0.2608384),
                y: f32xN::splat(-0.28958648),
                z: f32xN::splat(-2.6699374),
            },
            n: V3DxN {
                x: f32xN::splat(0.47832328),
                y: f32xN::splat(-0.579173),
                z: f32xN::splat(0.6601253),
            },
            hit: m32xN::splat(true),
        };
        let rays = RayxN {
            origin: V3DxN {
                x: f32xN::splat(-0.5),
                y: f32xN::splat(-0.4999),
                z: f32xN::splat(-0.5),
            },
            dir: V3DxN {
                x: f32xN::splat(0.10904764),
                y: f32xN::splat(0.095894136),
                z: f32xN::splat(-0.98940027),
            },
        };
        let r = rays.intersect(&plane, isect);
        assert_eq!(r.hit, m32xN::splat(true));
    }
}


================================================
FILE: examples/aobench/src/intersection/ray_sphere.rs
================================================
//! Intersection of a ray with a sphere.

use crate::geometry::{f32xN, Dot, Ray, RayxN, Selectable, Sphere};
use crate::intersection::{Intersect, Isect, IsectxN};

// Scalar ray-sphere intersection
impl Intersect<Sphere> for Ray {
    type Isect = Isect;
    #[inline(always)]
    fn intersect(&self, sphere: &Sphere, mut isect: Isect) -> Isect {
        let ray = self;
        let rs = ray.origin - sphere.center;

        let b = rs.dot(ray.dir);
        let c = rs.dot(rs) - sphere.radius * sphere.radius;
        let d = b * b - c;

        if d > 0. {
            let t = -b - d.sqrt();

            if t > 0. && t < isect.t {
                isect.t = t;
                isect.hit = true;
                isect.p = ray.origin + t * ray.dir;
                isect.n = (isect.p - sphere.center).normalized();
            }
        }

        isect
    }
}

// Vector ray-sphere intersection for a packet of rays
impl Intersect<Sphere> for RayxN {
    type Isect = IsectxN;
    #[inline(always)]
    fn intersect(&self, sphere: &Sphere, mut isect: IsectxN) -> IsectxN {
        let ray = self;
        let rs = ray.origin - sphere.center;

        let b = rs.dot(ray.dir);
        let radius = f32xN::splat(sphere.radius);
        let c = radius.mul_adde(-radius, rs.dot(rs));
        let d = b.mul_adde(b, -c);

        let _old_isect = isect;

        let m = d.gt(f32xN::splat(0.));
        if m.any() {
            let t = m.sel(-b - d.sqrt(), isect.t);
            let m = m & t.gt(f32xN::splat(0.)) & t.lt(isect.t);

            if m.any() {
                isect.t = m.sel(t, isect.t);
                isect.hit |= m;
                isect.p = m.sel(ray.origin + t * ray.dir, isect.p);
                isect.n =
                    m.sel((isect.p - sphere.center).normalized(), isect.n);
            }
        }

        #[cfg(debug_assertions)]
        {
            // Check that the vector and the scalar version produce the same results
            // for the same inputs in debug builds
            for i in 0..f32xN::lanes() {
                let old_isect_i = _old_isect.get(i);
                let ray_i = self.get(i);
                let isect_i = ray_i.intersect(sphere, old_isect_i);
                assert!(isect_i.almost_eq(&isect.get(i)), "{:?} !~= {:?}\n\nsphere: {:?}\n\nold_isect: {:?}\n\nrays: {:?}\n\ni: {:?}\nold_isect_i: {:?}\nray_i: {:?}\n\n", isect_i, isect.get(i), sphere, _old_isect, self, i, old_isect_i, ray_i);
            }
        }

        isect
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::geometry::{m32xN, V3DxN, V3D};

    #[test]
    fn sanity() {
        let sphere = Sphere {
            center: V3D {
                x: 0.,
                y: 0.,
                z: -10.,
            },
            radius: 1.,
        };

        let ray_hit = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: 0.01,
                y: 0.01,
                z: -1.,
            },
        };
        let ray_miss = Ray {
            origin: V3D::default(),
            dir: V3D {
                x: 0.,
                y: 0.,
                z: 1.,
            },
        };

        let isect_hit = ray_hit.intersect(&sphere, Isect::default());
        assert!(isect_hit.hit);
        let isect_miss = ray_miss.intersect(&sphere, Isect::default());
        assert!(!isect_miss.hit);

        // hit, miss, hit, miss
        #[cfg(feature = "256bit")]
        let z_val = f32xN::new(-1., 1., -1., 1., -1., 1., -1., 1.);
        #[cfg(not(feature = "256bit"))]
        let z_val = f32xN::new(-1., 1., -1., 1.);

        let rays = RayxN {
            origin: V3DxN::default(),
            dir: V3DxN {
                x: f32xN::splat(0.01),
                y: f32xN::splat(0.01),
                z: z_val,
            },
        };

        let isectxN = rays.intersect(&sphere, IsectxN::default());

        #[cfg(feature = "256bit")]
        let expected =
            m32xN::new(true, false, true, false, true, false, true, false);
        #[cfg(not(feature = "256bit"))]
        let expected = m32xN::new(true, false, true, false);

        assert_eq!(isectxN.hit, expected);

        assert_eq!(isect_hit.t, isectxN.t.extract(0));
        assert_eq!(isect_hit.t, isectxN.t.extract(2));
        assert_eq!(isect_miss.t, isectxN.t.extract(1));
        assert_eq!(isect_miss.t, isectxN.t.extract(3));

        assert_eq!(isect_hit.p.x, isectxN.p.x.extract(0));
        assert_eq!(isect_hit.p.y, isectxN.p.y.extract(0));
        assert_eq!(isect_hit.p.z, isectxN.p.z.extract(0));

        assert_eq!(isect_hit.p.x, isectxN.p.x.extract(2));
        assert_eq!(isect_hit.p.y, isectxN.p.y.extract(2));
        assert_eq!(isect_hit.p.z, isectxN.p.z.extract(2));

        assert_eq!(isect_miss.p.x, isectxN.p.x.extract(1));
        assert_eq!(isect_miss.p.y, isectxN.p.y.extract(1));
        assert_eq!(isect_miss.p.z, isectxN.p.z.extract(1));

        assert_eq!(isect_miss.p.x, isectxN.p.x.extract(3));
        assert_eq!(isect_miss.p.y, isectxN.p.y.extract(3));
        assert_eq!(isect_miss.p.z, isectxN.p.z.extract(3));

        assert_eq!(isect_hit.n.x, isectxN.n.x.extract(0));
        assert_eq!(isect_hit.n.y, isectxN.n.y.extract(0));
        assert_eq!(isect_hit.n.z, isectxN.n.z.extract(0));

        assert_eq!(isect_hit.n.x, isectxN.n.x.extract(2));
        assert_eq!(isect_hit.n.y, isectxN.n.y.extract(2));
        assert_eq!(isect_hit.n.z, isectxN.n.z.extract(2));

        assert_eq!(isect_miss.n.x, isectxN.n.x.extract(1));
        assert_eq!(isect_miss.n.y, isectxN.n.y.extract(1));
        assert_eq!(isect_miss.n.z, isectxN.n.z.extract(1));

        assert_eq!(isect_miss.n.x, isectxN.n.x.extract(3));
        assert_eq!(isect_miss.n.y, isectxN.n.y.extract(3));
        assert_eq!(isect_miss.n.z, isectxN.n.z.extract(3));
    }
}


================================================
FILE: examples/aobench/src/intersection/single.rs
================================================
//! Scalar intersection result

use crate::geometry::V3D;

/// Intersection result
#[derive(Copy, Clone, Debug)]
pub struct Isect {
    pub t: f32,
    pub p: V3D,
    pub n: V3D,
    pub hit: bool,
}

impl Default for Isect {
    #[inline]
    fn default() -> Self {
        Self {
            t: 1e17,
            hit: false,
            p: V3D::default(),
            n: V3D::default(),
        }
    }
}

impl Isect {
    #[inline(always)]
    #[must_use]
    pub fn almost_eq(&self, rhs: &Self) -> bool {
        const EPSILON: f32 = 1E-3;
        (self.t - rhs.t).abs() < EPSILON
            && self.p.almost_eq(&rhs.p)
            && self.n.almost_eq(&rhs.n)
            && self.hit == rhs.hit
    }
}


================================================
FILE: examples/aobench/src/ispc_.rs
================================================
//! Includes the ISPC implementations.
use crate::*;
use ispc::*;

ispc_module!(aobench);

pub fn ao<S: Scene>(
    _scene: &mut S,
    nsubsamples: usize,
    img: &mut crate::Image,
) {
    let (w, h) = img.size();
    unsafe {
        self::aobench::ao_ispc(
            w as i32,
            h as i32,
            nsubsamples as i32,
            img.fdata.as_mut_ptr(),
        )
    }
}

pub fn ao_tasks<S: Scene>(
    _scene: &mut S,
    nsubsamples: usize,
    img: &mut crate::Image,
) {
    let (w, h) = img.size();
    unsafe {
        self::aobench::ao_ispc_tasks(
            w as i32,
            h as i32,
            nsubsamples as i32,
            img.fdata.as_mut_ptr(),
        )
    }
}


================================================
FILE: examples/aobench/src/lib.rs
================================================
//! aobench: Ambient Occlusion Renderer benchmark.
//!
//! Based on [aobench](https://code.google.com/archive/p/aobench/) by Syoyo
//! Fujita.
// FIXME: Null pointer deref warning triggered in this example,
// likely inside a macro expansion deriving from packed_simd.
#![deny(rust_2018_idioms)]
#![allow(non_snake_case, non_camel_case_types)]
#![allow(
    clippy::many_single_char_names,
    clippy::similar_names,
    clippy::cast_precision_loss,
    clippy::inline_always,
    clippy::cast_possible_truncation,
    clippy::cast_sign_loss,
    clippy::identity_op,
    clippy::erasing_op,
    clippy::must_use_candidate,
    clippy::float_cmp
)]

pub mod ambient_occlusion;
pub mod geometry;
pub mod image;
pub mod intersection;
pub mod random;
pub mod scene;

#[cfg(feature = "ispc")]
pub mod ispc_;
pub mod scalar;
pub mod scalar_parallel;
pub mod tiled;
pub mod tiled_parallel;
pub mod vector;
pub mod vector_parallel;

pub use self::image::Image;
pub use self::scene::Scene;


================================================
FILE: examples/aobench/src/main.rs
================================================
//! aobench: Ambient Occlusion Renderer benchmark.
//!
//! Based on [aobench](https://code.google.com/archive/p/aobench/) by Syoyo
//! Fujita.
#![deny(rust_2018_idioms)]

use aobench_lib::*;
use std::path::PathBuf;
use structopt::StructOpt;

/// Command-line arguments.
#[derive(StructOpt, Debug)]
struct Opt {
    /// Image width.
    width: usize,
    /// Image height.
    height: usize,

    /// Algorithm
    #[structopt(short = "a", long = "algo")]
    algo: String,

    /// Output file.
    #[structopt(short = "o", long = "output", parse(from_os_str))]
    output: Option<PathBuf>,
}

const ALGORITHMS: &[&str] = &[
    "scalar",
    "scalar_par",
    "vector",
    "vector_par",
    "tiled",
    "tiled_par",
    "ispc",
    "ispc_tasks",
];

fn main() {
    let opt = Opt::from_args();
    let mut scene = aobench_lib::scene::Random::default();
    let mut img = Image::new(opt.width, opt.height);

    let algorithm_name = opt.algo.as_str();

    if let Some(algorithm) = ALGORITHMS.iter().find(|&&a| a == algorithm_name)
    {
        let d = time::Duration::span(|| match *algorithm {
            "scalar" => scalar::ao(&mut scene, 2, &mut img),
            "scalar_par" => scalar_parallel::ao(&mut scene, 2, &mut img),
            "vector" => vector::ao(&mut scene, 2, &mut img),
            "vector_par" => vector_parallel::ao(&mut scene, 2, &mut img),
            "tiled" => tiled::ao(&mut scene, 2, &mut img),
            "tiled_par" => tiled_parallel::ao(&mut scene, 2, &mut img),
            "ispc" => {
                #[cfg(feature = "ispc")]
                {
                    ispc_::ao(&mut scene, 2, &mut img)
                }
                #[cfg(not(feature = "ispc"))]
                {
                    panic!("the `ispc` algorithm requires building with --features=ispc");
                }
            }
            "ispc_tasks" => {
                #[cfg(feature = "ispc")]
                {
                    ispc_::ao_tasks(&mut scene, 2, &mut img)
                }
                #[cfg(not(feature = "ispc"))]
                {
                    panic!("the `ispc_task` algorithm requires building with --features=ispc");
                }
            }
            _ => unreachable!(),
        });
        let image_path = opt.output.unwrap_or_else(|| {
            PathBuf::from(format!("image_{}.png", algorithm))
        });
        img.write_png(&image_path, false)
            .expect("failed to write image");

        println!("time: {} ms", d.num_milliseconds());
    } else {
        let mut error = format!(
            "unknown algorithm: \"{}\"\nAvailable algorithms:",
            algorithm_name
        );
        for a in ALGORITHMS {
            error.push_str(&format!("\n- {}", a));
        }
        panic!("{}", error);
    }
}


================================================
FILE: examples/aobench/src/random.rs
================================================
//! Pseudo random number generators.
//!
//! Currently only `LFSR113` is implemented, since that is what ISPC uses, and it
//! allows us to compare Rust's codegen with that of ISPC for the same
//! algorithms.
//!
//! Use `{scalar,vector}::thread_rng()` to get a handle to the thread-local
//! random number generator, and call `.gen()` to generate an `f32` or an
//! `f32xN`.

/// Scalar pseudo random number generator
pub mod scalar {
    use std::cell::UnsafeCell;
    use std::rc::Rc;

    // Note: This implementation could be vectorized using an `u32x4`.
    struct RngT(u32, u32, u32, u32);

    impl RngT {
        fn from_seed(x: u32) -> Self {
            let z0 = x;
            let z1 = x ^ 0xbeef_f00d;
            let z2 = ((x & 0xffff_u32) << 16) | (x >> 16);
            let z3 = ((x & 0xff_u32) << 24)
                | ((x & 0xff00_u32) << 8)
                | ((x & 0x00ff_0000_u32) >> 8)
                | (x & 0xff00_0000_u32) >> 24;
            Self(z0, z1, z2, z3)
        }

        pub fn gen_u32(&mut self) -> u32 {
            let mut b = ((self.0 << 6) ^ self.0) >> 13;
            self.0 = ((self.0 & 4_294_967_294_u32) << 18) ^ b;
            b = ((self.1 << 2) ^ self.1) >> 27;
            self.1 = ((self.1 & 4_294_967_288_u32) << 2) ^ b;
            b = ((self.2 << 13) ^ self.2) >> 21;
            self.2 = ((self.2 & 4_294_967_280_u32) << 7) ^ b;
            b = ((self.3 << 3) ^ self.3) >> 12;
            self.3 = ((self.3 & 4_294_967_168_u32) << 13) ^ b;
            self.0 ^ self.1 ^ self.2 ^ self.3
        }

        pub fn gen(&mut self) -> f32 {
            let mut v = self.gen_u32();
            v &= (1_u32 << 23) - 1;
            let v = f32::from_bits(0x3F80_0000 | v);
            v - 1.
        }
    }

    #[derive(Clone)]
    pub struct RngH {
        rng: Rc<UnsafeCell<RngT>>,
    }

    impl RngH {
        pub fn gen(&mut self) -> f32 {
            unsafe { (*self.rng.get()).gen() }
        }
    }

    thread_local!(
        static THREAD_RNG_KEY: Rc<UnsafeCell<RngT>> = {
            Rc::new(UnsafeCell::new(RngT::from_seed(1)))
        }
    );

    pub fn thread_rng() -> RngH {
        RngH {
            rng: THREAD_RNG_KEY.with(Clone::clone),
        }
    }
}

/// Vector pseudo random number generator
pub mod vector {
    use crate::geometry::{f32xN, u32xN, IncrV};
    use std::cell::UnsafeCell;
    use std::rc::Rc;
    struct RngT(u32xN, u32xN, u32xN, u32xN);

    impl RngT {
        fn from_seed(x: u32xN) -> Self {
            let z0 = x;
            let z1 = x ^ u32xN::splat(0xbeef_f00d);
            let z2 = ((x & u32xN::splat(0xffff)) << 16) | (x >> 16);
            let z3 = ((x & u32xN::splat(0xff)) << 24)
                | ((x & u32xN::splat(0xff00)) << 8)
                | ((x & u32xN::splat(0x00ff_0000)) >> 8)
                | (x & u32xN::splat(0xff00_0000)) >> 24;
            Self(z0, z1, z2, z3)
        }

        #[inline(always)]
        pub fn gen_u32(&mut self) -> u32xN {
            let mut b = ((self.0 << 6) ^ self.0) >> 13;
            self.0 = ((self.0 & u32xN::splat(4_294_967_294)) << 18) ^ b;
            b = ((self.1 << 2) ^ self.1) >> 27;
            self.1 = ((self.1 & u32xN::splat(4_294_967_288)) << 2) ^ b;
            b = ((self.2 << 13) ^ self.2) >> 21;
            self.2 = ((self.2 & u32xN::splat(4_294_967_280)) << 7) ^ b;
            b = ((self.3 << 3) ^ self.3) >> 12;
            self.3 = ((self.3 & u32xN::splat(4_294_967_168)) << 13) ^ b;
            self.0 ^ self.1 ^ self.2 ^ self.3
        }

        #[inline(always)]
        pub fn gen(&mut self) -> f32xN {
            let mut v = self.gen_u32();
            v &= u32xN::splat((1_u32 << 23) - 1);
            let v: f32xN =
                unsafe { std::mem::transmute(u32xN::splat(0x3F80_0000) | v) };
            v - f32xN::splat(1.)
        }
    }

    #[derive(Clone)]
    pub struct RngH {
        rng: Rc<UnsafeCell<RngT>>,
    }

    impl RngH {
        #[inline(always)]
        pub fn gen(&mut self) -> f32xN {
            unsafe { (*self.rng.get()).gen() }
        }
    }

    thread_local!(
        static THREAD_RNG_KEY: Rc<UnsafeCell<RngT>> = {
            Rc::new(UnsafeCell::new(RngT::from_seed(<u32xN as IncrV>::incr(0, 1))))
        }
    );

    pub fn thread_rng() -> RngH {
        RngH {
            rng: THREAD_RNG_KEY.with(Clone::clone),
        }
    }
}


================================================
FILE: examples/aobench/src/scalar.rs
================================================
//! Scalar serial aobench

use crate::ambient_occlusion;
use crate::geometry::{Ray, V3D};
use crate::intersection::{Intersect, Isect};
use crate::scene::Scene;

pub fn ao<S: Scene>(
    scene: &mut S,
    nsubsamples: usize,
    img: &mut crate::Image,
) {
    let (w, h) = img.size();
    let image = &mut img.fdata;
    let ns = nsubsamples;
    for y in 0..h {
        for x in 0..w {
            let offset = 3 * (y * w + x);
            for u in 0..ns {
                for v in 0..ns {
                    let (x, y, u, v, h, w, ns) = (
                        x as f32, y as f32, u as f32, v as f32, h as f32,
                        w as f32, ns as f32,
                    );
                    let dir: V3D = V3D {
                        x: (x + u / ns - w / 2.) / (w / 2.) * w / h,
                        y: -(y + v / ns - h / 2.) / (h / 2.),
                        z: -1.,
                    };
                    let dir = dir.normalized();

                    let ray = Ray {
                        origin: V3D::default(),
                        dir,
                    };

                    let mut isect = Isect::default();
                    for s in scene.spheres() {
                        isect = ray.intersect(s, isect);
                    }
                    isect = ray.intersect(scene.plane(), isect);

                    let ret = if isect.hit {
                        ambient_occlusion::scalar(scene, &isect)
                    } else {
                        0.
                    };

                    // Update image for AO for this ray
                    image[offset + 0] += ret;
                    image[offset + 1] += ret;
                    image[offset + 2] += ret;
                }
            }
            // Normalize image pixels by number of samples taken per pixel
            let ns = (ns * ns) as f32;
            image[offset + 0] /= ns;
            image[offset + 1] /= ns;
            image[offset + 2] /= ns;
        }
    }
}


================================================
FILE: examples/aobench/src/scalar_parallel.rs
================================================
//! Scalar parallel aobench

use crate::ambient_occlusion;
use crate::geometry::{Ray, V3D};
use crate::intersection::{Intersect, Isect};
use crate::scene::Scene;
use rayon::prelude::*;

pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Image) {
    let (w, h) = img.size();
    let ns = nsubsamples;
    img.fdata
        .par_chunks_mut(3 * w)
        .enumerate()
        .for_each(|(y, image)| {
            assert!(image.len() == 3 * w);
            let mut scene = S::default();
            for x in 0..w {
                let offset = 3 * x;
                for u in 0..ns {
                    for v in 0..ns {
                        let (x, y, u, v, h, w, ns) = (
                            x as f32, y as f32, u as f32, v as f32, h as f32,
                            w as f32, ns as f32,
                        );
                        let dir: V3D = V3D {
                            x: (x + u / ns - w / 2.) / (w / 2.) * w / h,
                            y: -(y + v / ns - h / 2.) / (h / 2.),
                            z: -1.,
                        };
                        let dir = dir.normalized();

                        let ray = Ray {
                            origin: V3D::default(),
                            dir,
                        };

                        let mut isect = Isect::default();
                        for s in scene.spheres() {
                            isect = ray.intersect(s, isect);
                        }
                        isect = ray.intersect(scene.plane(), isect);

                        let ret = if isect.hit {
                            ambient_occlusion::scalar(&mut scene, &isect)
                        } else {
                            0.
                        };

                        // Update image for AO for this ray
                        image[offset + 0] += ret;
                        image[offset + 1] += ret;
                        image[offset + 2] += ret;
                    }
                }
                // Normalize image pixels by number of samples taken per pixel
                let ns = (ns * ns) as f32;
                image[offset + 0] /= ns;
                image[offset + 1] /= ns;
                image[offset + 2] /= ns;
            }
        });
}


================================================
FILE: examples/aobench/src/scene/mod.rs
================================================
/// Scene interface
use crate::geometry::{f32xN, Plane, Sphere};

pub trait Scene: Send + Sync + Default {
    const NAO_SAMPLES: usize;
    fn rand(&mut self) -> f32;
    fn plane(&self) -> &Plane;
    fn spheres(&self) -> &[Sphere];
    fn rand_f32xN(&mut self) -> (f32xN, f32xN) {
        #[cfg(feature = "256bit")]
        {
            let r = [
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
            ];
            (
                f32xN::new(r[0], r[2], r[4], r[6], r[8], r[10], r[12], r[14]),
                f32xN::new(r[1], r[3], r[5], r[7], r[9], r[11], r[13], r[15]),
            )
        }
        #[cfg(not(feature = "256bit"))]
        {
            let r = [
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
                self.rand(),
            ];
            (
                f32xN::new(r[0], r[2], r[4], r[6]),
                f32xN::new(r[1], r[3], r[5], r[7]),
            )
        }
    }
}

mod random;
pub use self::random::Random;

mod test;
pub use self::test::Test;


================================================
FILE: examples/aobench/src/scene/random.rs
================================================
//! Aobench scene: 3 spheres and a plane using a random number generator

use crate::geometry::{f32xN, Plane, Sphere, V3D};
use crate::scene::Scene;

#[derive(Clone)]
pub struct Random {
    pub plane: Plane,
    pub spheres: [Sphere; 3],
}

impl Default for Random {
    fn default() -> Self {
        let plane = Plane {
            p: V3D {
                x: 0.,
                y: -0.5,
                z: 0.,
            },
            n: V3D {
                x: 0.,
                y: 1.,
                z: 0.,
            },
        };
        let spheres = [
            Sphere {
                center: V3D {
                    x: -2.,
                    y: 0.,
                    z: -3.5,
                },
                radius: 0.5,
            },
            Sphere {
                center: V3D {
                    x: -0.5,
                    y: 0.,
                    z: -3.,
                },
                radius: 0.5,
            },
            Sphere {
                center: V3D {
                    x: 1.,
                    y: 0.,
                    z: -2.2,
                },
                radius: 0.5,
            },
        ];
        Self { plane, spheres }
    }
}

impl Scene for Random {
    const NAO_SAMPLES: usize = 8;
    #[inline(always)]
    fn rand(&mut self) -> f32 {
        crate::random::scalar::thread_rng().gen()
    }
    #[inline(always)]
    fn plane(&self) -> &Plane {
        &self.plane
    }
    #[inline(always)]
    fn spheres(&self) -> &[Sphere] {
        &self.spheres
    }
    #[inline(always)]
    fn rand_f32xN(&mut self) -> (f32xN, f32xN) {
        let mut rng = crate::random::vector::thread_rng();
        (rng.gen(), rng.gen())
    }
}


================================================
FILE: examples/aobench/src/scene/test.rs
================================================
//! Aobench scene: 3 spheres and a plane using a random number generator

use crate::geometry::{Plane, Sphere, V3D};
use crate::scene::Scene;
use std::num::Wrapping;

#[derive(Clone)]
pub struct Test {
    pub plane: Plane,
    pub spheres: [Sphere; 3],
    rands: Vec<f32>,
    rand_step: Wrapping<usize>,
}

impl Default for Test {
    fn default() -> Self {
        let plane = Plane {
            p: V3D {
                x: 0.,
                y: -0.5,
                z: 0.,
            },
            n: V3D {
                x: 0.,
                y: 1.,
                z: 0.,
            },
        };
        let spheres = [
            Sphere {
                center: V3D {
                    x: -2.,
                    y: 0.,
                    z: -3.5,
                },
                radius: 0.5,
            },
            Sphere {
                center: V3D {
                    x: -0.5,
                    y: 0.,
                    z: -3.,
                },
                radius: 0.5,
            },
            Sphere {
                center: V3D {
                    x: 1.,
                    y: 0.,
                    z: -2.2,
                },
                radius: 0.5,
            },
        ];
        let mut rands = Vec::new();
        let mut rng = crate::random::scalar::thread_rng();
        for _ in 0..2 * Self::NAO_SAMPLES * Self::NAO_SAMPLES {
            rands.push(rng.gen());
        }
        let rand_step = Wrapping(0);
        Self {
            plane,
            spheres,
            rands,
            rand_step,
        }
    }
}

impl Scene for Test {
    const NAO_SAMPLES: usize = 8;
    fn rand(&mut self) -> f32 {
        let v = self.rands[self.rand_step.0];
        self.rand_step += Wrapping(1);
        if self.rand_step
            >= Wrapping(2 * Self::NAO_SAMPLES * Self::NAO_SAMPLES)
        {
            self.rand_step = Wrapping(0);
        }
        v
    }
    fn plane(&self) -> &Plane {
        &self.plane
    }
    fn spheres(&self) -> &[Sphere] {
        &self.spheres
    }
}


================================================
FILE: examples/aobench/src/tiled.rs
================================================
//! SIMD serial aobench

use crate::ambient_occlusion;
use crate::geometry::{f32xN, pf32xN, usizexN, IncrV, RayxN, V3DxN};
use crate::intersection::{Intersect, IsectxN};
use crate::scene::Scene;
use cfg_if::cfg_if;

#[inline(always)]
fn ao_impl<S: Scene>(
    scene: &mut S,
    nsubsamples: usize,
    img: &mut crate::Image,
) {
    let (w, h) = img.size();
    assert_eq!(w % f32xN::lanes(), 0);
    let image = &mut img.fdata;
    let ns = nsubsamples;
    let inv_ns = 1. / (ns as f32);
    let ptr = pf32xN::splat(image.as_mut_ptr());
    for y in 0..h {
        let yf = f32xN::splat(y as f32);
        for x in (0..w).step_by(f32xN::lanes()) {
            let xf = f32xN::incr(x as f32, 1.);
            let offset = usizexN::splat(3 * (y * w + x));
            let r_ptr = unsafe { ptr.add(offset + usizexN::incr(0, 3)) };
            let g_ptr = unsafe { ptr.add(offset + usizexN::incr(1, 3)) };
            let b_ptr = unsafe { ptr.add(offset + usizexN::incr(2, 3)) };

            for u in 0..ns {
                for v in 0..ns {
                    let du = (u as f32) * inv_ns;
                    let dv = (v as f32) * inv_ns;
                    let (hf, wf) = (h as f32, w as f32);

                    let dir = V3DxN {
                        x: (xf + f32xN::splat(du - (wf / 2.)))
                            / f32xN::splat((wf / 2.) * hf / wf),
                        y: -(yf + f32xN::splat(dv - (hf / 2.)))
                            / f32xN::splat(hf / 2.),
                        z: f32xN::splat(-1.),
                    };
                    let dir = dir.normalized();

                    let ray = RayxN {
                        origin: V3DxN::default(),
                        dir,
                    };

                    let mut isect = IsectxN::default();
                    for s in scene.spheres() {
                        isect = ray.intersect(s, isect);
                    }
                    isect = ray.intersect(scene.plane(), isect);

                    if isect.hit.any() {
                        let ret =
                            ambient_occlusion::vector_tiled(scene, &isect)
                                * f32xN::splat(inv_ns * inv_ns);

                        unsafe {
                            let img_r =
                                r_ptr.read(isect.hit, f32xN::splat(0.));
                            let img_g =
                                g_ptr.read(isect.hit, f32xN::splat(0.));
                            let img_b =
                                b_ptr.read(isect.hit, f32xN::splat(0.));

                            r_ptr.write(isect.hit, img_r + ret);
                            g_ptr.write(isect.hit, img_g + ret);
                            b_ptr.write(isect.hit, img_b + ret);
                        }
                    }
                }
            }
        }
    }
}

cfg_if! {
    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
        #[target_feature(enable = "sse4.2")]
        unsafe fn ao_sse42<S: Scene>(scene: &mut S, nsubsamples: usize,
                                     img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx")]
        unsafe fn ao_avx<S: Scene>(scene: &mut S, nsubsamples: usize,
                                   img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx,fma")]
        unsafe fn ao_avx_fma<S: Scene>(scene: &mut S, nsubsamples: usize,
                                   img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx2,fma")]
        unsafe fn ao_avx2<S: Scene>(scene: &mut S, nsubsamples: usize,
                                    img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        pub fn ao<S: Scene>(scene: &mut S, nsubsamples: usize,
                            img: &mut crate::Image) {
            unsafe {
                if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
                    ao_avx2(scene, nsubsamples, img);
                } else if is_x86_feature_detected!("avx") {
                    if is_x86_feature_detected!("fma") {
                        ao_avx_fma(scene, nsubsamples, img);
                    } else {
                        ao_avx(scene, nsubsamples, img);
                    }
                } else if is_x86_feature_detected!("sse4.2") {
                    ao_sse42(scene, nsubsamples, img);
                } else {
                    ao_impl(scene, nsubsamples, img);
                }
            }
        }
    } else {
        pub fn ao<S: Scene>(scene: &mut S, nsubsamples: usize, img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }
    }
}


================================================
FILE: examples/aobench/src/tiled_parallel.rs
================================================
//! SIMD tiled parallel aobench

use crate::ambient_occlusion;
use crate::geometry::{f32xN, pf32xN, usizexN, IncrV, RayxN, V3DxN};
use crate::intersection::{Intersect, IsectxN};
use crate::scene::Scene;
use rayon::prelude::*;

pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Image) {
    let (w, h) = img.size();
    assert_eq!(w % f32xN::lanes(), 0);
    let ns = nsubsamples;
    let inv_ns = 1. / (ns as f32);
    let ptr = usizexN::splat(img.fdata.as_mut_ptr() as usize);
    img.fdata
        .par_chunks_mut(3 * w)
        .enumerate()
        .for_each(|(y, image)| {
            assert!(image.len() == 3 * w);
            let mut scene = S::default();
            let yf = f32xN::splat(y as f32);
            let ptr: pf32xN = unsafe { std::mem::transmute(ptr) };
            for x in (0..w).step_by(f32xN::lanes()) {
                let xf = f32xN::incr(x as f32, 1.);
                let offset = usizexN::splat(3 * (y * w + x));
                let r_ptr = unsafe { ptr.add(offset + usizexN::incr(0, 3)) };
                let g_ptr = unsafe { ptr.add(offset + usizexN::incr(1, 3)) };
                let b_ptr = unsafe { ptr.add(offset + usizexN::incr(2, 3)) };

                for u in 0..ns {
                    for v in 0..ns {
                        let du = (u as f32) * inv_ns;
                        let dv = (v as f32) * inv_ns;
                        let (hf, wf) = (h as f32, w as f32);

                        let dir = V3DxN {
                            x: (xf + f32xN::splat(du - (wf / 2.)))
                                / f32xN::splat((wf / 2.) * hf / wf),
                            y: -(yf + f32xN::splat(dv - (hf / 2.)))
                                / f32xN::splat(hf / 2.),
                            z: f32xN::splat(-1.),
                        };
                        let dir = dir.normalized();

                        let ray = RayxN {
                            origin: V3DxN::default(),
                            dir,
                        };

                        let mut isect = IsectxN::default();
                        for s in scene.spheres() {
                            isect = ray.intersect(s, isect);
                        }
                        isect = ray.intersect(scene.plane(), isect);

                        if isect.hit.any() {
                            let ret = ambient_occlusion::vector_tiled(
                                &mut scene, &isect,
                            ) * f32xN::splat(inv_ns * inv_ns);

                            unsafe {
                                let img_r =
                                    r_ptr.read(isect.hit, f32xN::splat(0.));
                                let img_g =
                                    g_ptr.read(isect.hit, f32xN::splat(0.));
                                let img_b =
                                    b_ptr.read(isect.hit, f32xN::splat(0.));

                                r_ptr.write(isect.hit, img_r + ret);
                                g_ptr.write(isect.hit, img_g + ret);
                                b_ptr.write(isect.hit, img_b + ret);
                            }
                        }
                    }
                }
            }
        });
}


================================================
FILE: examples/aobench/src/vector.rs
================================================
//! SIMD serial aobench

use crate::ambient_occlusion;
use crate::geometry::{Ray, V3D};
use crate::intersection::{Intersect, Isect};
use crate::scene::Scene;
use cfg_if::cfg_if;

#[inline(always)]
fn ao_impl<S: Scene>(
    scene: &mut S,
    nsubsamples: usize,
    img: &mut crate::Image,
) {
    let (w, h) = img.size();
    let image = &mut img.fdata;
    let ns = nsubsamples;
    let inv_ns = 1. / (ns as f32);
    for y in 0..h {
        for x in 0..w {
            let offset = 3 * (y * w + x);
            for u in 0..ns {
                for v in 0..ns {
                    let du = (u as f32) * inv_ns;
                    let dv = (v as f32) * inv_ns;

                    let (x, y, h, w) =
                        (x as f32, y as f32, h as f32, w as f32);

                    let dir = V3D {
                        x: (x + du - (w * 0.5)) / (w * 0.5) * w / h,
                        y: -(y + dv - (h * 0.5)) / (h * 0.5),
                        z: -1.,
                    };
                    let dir = dir.normalized();

                    let ray = Ray {
                        origin: V3D::default(),
                        dir,
                    };

                    let mut isect = Isect::default();
                    for s in scene.spheres() {
                        isect = ray.intersect(s, isect);
                    }
                    isect = ray.intersect(scene.plane(), isect);

                    let ret = if isect.hit {
                        ambient_occlusion::vector(scene, &isect)
                    } else {
                        0.
                    };
                    let ret = ret * inv_ns * inv_ns;

                    // Update image for AO for this ray
                    // (already normalized)
                    image[offset + 0] += ret;
                    image[offset + 1] += ret;
                    image[offset + 2] += ret;
                }
            }
        }
    }
}

cfg_if! {
    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
        #[target_feature(enable = "sse4.2")]
        unsafe fn ao_sse42<S: Scene>(scene: &mut S, nsubsamples: usize,
                                     img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx")]
        unsafe fn ao_avx<S: Scene>(scene: &mut S, nsubsamples: usize,
                                   img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx,fma")]
        unsafe fn ao_avx_fma<S: Scene>(scene: &mut S, nsubsamples: usize,
                                   img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        #[target_feature(enable = "avx2,fma")]
        unsafe fn ao_avx2<S: Scene>(scene: &mut S, nsubsamples: usize,
                                    img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }

        pub fn ao<S: Scene>(scene: &mut S, nsubsamples: usize,
                            img: &mut crate::Image) {
            unsafe {
                if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
                    ao_avx2(scene, nsubsamples, img);
                } else if is_x86_feature_detected!("avx") {
                    if is_x86_feature_detected!("fma") {
                        ao_avx_fma(scene, nsubsamples, img);
                    } else {
                        ao_avx(scene, nsubsamples, img);
                    }
                } else if is_x86_feature_detected!("sse4.2") {
                    ao_sse42(scene, nsubsamples, img);
                } else {
                    ao_impl(scene, nsubsamples, img);
                }
            }
        }
    } else {
        pub fn ao<S: Scene>(scene: &mut S, nsubsamples: usize, img: &mut crate::Image) {
            ao_impl(scene, nsubsamples, img);
        }
    }
}


================================================
FILE: examples/aobench/src/vector_parallel.rs
================================================
//! SIMD parallel aobench

use crate::ambient_occlusion;
use crate::geometry::{Ray, V3D};
use crate::intersection::{Intersect, Isect};
use crate::scene::Scene;
use rayon::prelude::*;

pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Image) {
    let (w, h) = img.size();
    let ns = nsubsamples;
    let inv_ns = 1. / (ns as f32);
    img.fdata
        .par_chunks_mut(3 * w)
        .enumerate()
        .for_each(|(y, image)| {
            assert!(image.len() == 3 * w);
            let mut scene = S::default();
            for x in 0..w {
                let offset = 3 * x;
                for u in 0..ns {
                    for v in 0..ns {
                        let du = (u as f32) * inv_ns;
                        let dv = (v as f32) * inv_ns;

                        let (x, y, h, w) =
                            (x as f32, y as f32, h as f32, w as f32);

                        let dir = V3D {
                            x: (x + du - (w / 2.)) / (w / 2.) * w / h,
                            y: -(y + dv - (h / 2.)) / (h / 2.),
                            z: -1.,
                        };
                        let dir = dir.normalized();

                        let ray = Ray {
                            origin: V3D::default(),
                            dir,
                        };

                        let mut isect = Isect::default();
                        for s in scene.spheres() {
                            isect = ray.intersect(s, isect);
                        }
                        isect = ray.intersect(scene.plane(), isect);

                        let ret = if isect.hit {
                            ambient_occlusion::vector(&mut scene, &isect)
                        } else {
                            0.
                        };
                        let ret = ret * inv_ns * inv_ns;

                        // Update image for AO for this ray
                        // (already normalized)
                        image[offset + 0] += ret;
                        image[offset + 1] += ret;
                        image[offset + 2] += ret;
                    }
                }
            }
        });
}


================================================
FILE: examples/aobench/volta/.gitignore
================================================
ao
*.ppm
objs/


================================================
FILE: examples/aobench/volta/ao.ispc
================================================
// -*- mode: c++ -*-
/*
  Copyright (c) 2010-2011, Intel Corporation
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions are
  met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of Intel Corporation nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.


   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  
*/
/*
  Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
*/

#define NAO_SAMPLES		8
#define M_PI 3.1415926535f

typedef float<3> vec;

struct Isect {
    float      t;
    vec        p;
    vec        n;
    int        hit; 
};

struct Sphere {
    vec        center;
    float      radius;
};

struct Plane {
    vec    p;
    vec    n;
};

struct Ray {
    vec org;
    vec dir;
};

static inline float dot(vec a, vec b) {
    return a.x * b.x + a.y * b.y + a.z * b.z;
}

static inline vec vcross(vec v0, vec v1) {
    vec ret;
    ret.x = v0.y * v1.z - v0.z * v1.y;
    ret.y = v0.z * v1.x - v0.x * v1.z;
    ret.z = v0.x * v1.y - v0.y * v1.x;
    return ret;
}

static inline void vnormalize(vec &v) {
    float len2 = dot(v, v);
    float invlen = rsqrt(len2);
    v *= invlen;
}


static void
ray_plane_intersect(Isect &isect, Ray &ray, uniform Plane &plane) {
    float d = -dot(plane.p, plane.n);
    float v = dot(ray.dir, plane.n);

    cif (abs(v) < 1.0e-17) 
        return;
    else {
        float t = -(dot(ray.org, plane.n) + d) / v;

        cif ((t > 0.0) && (t < isect.t)) {
            isect.t = t;
            isect.hit = 1;
            isect.p = ray.org + ray.dir * t;
            isect.n = plane.n;
        }
    }
}


static inline void
ray_sphere_intersect(Isect &isect, Ray &ray, uniform Sphere &sphere) {
    vec rs = ray.org - sphere.center;

    float B = dot(rs, ray.dir);
    float C = dot(rs, rs) - sphere.radius * sphere.radius;
    float D = B * B - C;

    cif (D > 0.) {
        float t = -B - sqrt(D);

        cif ((t > 0.0) && (t < isect.t)) {
            isect.t = t;
            isect.hit = 1;
            isect.p = ray.org + t * ray.dir;
            isect.n = isect.p - sphere.center;
            vnormalize(isect.n);
        }
    }
}


static void
orthoBasis(vec basis[3], vec n) {
    basis[2] = n;
    basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;

    if ((n.x < 0.6) && (n.x > -0.6)) {
        basis[1].x = 1.0;
    } else if ((n.y < 0.6) && (n.y > -0.6)) {
        basis[1].y = 1.0;
    } else if ((n.z < 0.6) && (n.z > -0.6)) {
        basis[1].z = 1.0;
    } else {
        basis[1].x = 1.0;
    }

    basis[0] = vcross(basis[1], basis[2]);
    vnormalize(basis[0]);

    basis[1] = vcross(basis[2], basis[0]);
    vnormalize(basis[1]);
}


static float
ambient_occlusion(Isect &isect, uniform Plane &plane, uniform Sphere spheres[3],
                  RNGState &rngstate) {
    float eps = 0.0001f;
    vec p, n;
    vec basis[3];
    float occlusion = 0.0;

    p = isect.p + eps * isect.n;

    orthoBasis(basis, isect.n);

    static const uniform int ntheta = NAO_SAMPLES;
    static const uniform int nphi   = NAO_SAMPLES;
    for (uniform int j = 0; j < ntheta; j++) {
        for (uniform int i = 0; i < nphi; i++) {
            Ray ray;
            Isect occIsect;

            float theta = sqrt(frandom(&rngstate));
            float phi   = 2.0f * M_PI * frandom(&rngstate);
            float x = cos(phi) * theta;
            float y = sin(phi) * theta;
            float z = sqrt(1.0 - theta * theta);

            // local . global
            float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
            float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
            float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;

            ray.org = p;
            ray.dir.x = rx;
            ray.dir.y = ry;
            ray.dir.z = rz;

            occIsect.t   = 1.0e+17;
            occIsect.hit = 0;

            for (uniform int snum = 0; snum < 3; ++snum)
                ray_sphere_intersect(occIsect, ray, spheres[snum]); 
            ray_plane_intersect (occIsect, ray, plane); 

            if (occIsect.hit) occlusion += 1.0;
        }
    }

    occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
    return occlusion;
}


/* Compute the image for the scanlines from [y0,y1), for an overall image
   of width w and height h.
 */
static void ao_scanlines(uniform int y0, uniform int y1, uniform int w, 
                         uniform int h,  uniform int nsubsamples, 
                         uniform float image[]) {
    static uniform Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
    static uniform Sphere spheres[3] = {
        { { -2.0f, 0.0f, -3.5f }, 0.5f },
        { { -0.5f, 0.0f, -3.0f }, 0.5f },
        { { 1.0f, 0.0f, -2.2f }, 0.5f } };
    RNGState rngstate;

    seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
    float invSamples = 1.f / nsubsamples;

    foreach_tiled(y = y0 ... y1, x = 0 ... w, 
                  u = 0 ... nsubsamples, v = 0 ... nsubsamples) {
        float du = (float)u * invSamples, dv = (float)v * invSamples;

        // Figure out x,y pixel in NDC
        float px =  (x + du - (w / 2.0f)) / (w / 2.0f);
        float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);

        // Scale NDC based on width/height ratio, supporting non-square image output
        px *= (float)w / (float)h;

        float ret = 0.f;
        Ray ray;
        Isect isect;

        ray.org = 0.f;

        // Poor man's perspective projection
        ray.dir.x = px;
        ray.dir.y = py;
        ray.dir.z = -1.0;
        vnormalize(ray.dir);

        isect.t   = 1.0e+17;
        isect.hit = 0;

        for (uniform int snum = 0; snum < 3; ++snum)
            ray_sphere_intersect(isect, ray, spheres[snum]);
        ray_plane_intersect(isect, ray, plane);

        // Note use of 'coherent' if statement; the set of rays we
        // trace will often all hit or all miss the scene
        cif (isect.hit) {
            ret = ambient_occlusion(isect, plane, spheres, rngstate);
            ret *= invSamples * invSamples;

            int offset = 3 * (y * w + x);
            atomic_add_local(&image[offset], ret);
            atomic_add_local(&image[offset+1], ret);
            atomic_add_local(&image[offset+2], ret);
        }
    }
}


export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples, 
                    uniform float image[]) {
    ao_scanlines(0, h, w, h, nsubsamples, image);
}


static void task ao_task(uniform int width, uniform int height, 
                         uniform int nsubsamples, uniform float image[]) {
    ao_scanlines(taskIndex, taskIndex+1, width, height, nsubsamples, image);
}


export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples, 
                          uniform float image[]) {
    launch[h] ao_task(w, h, nsubsamples, image);
}


================================================
FILE: examples/dot_product/Cargo.toml
================================================
[package]
name = "dot_product"
version = "0.1.0"
authors = ["Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>"]
edition = "2018"

[dependencies]
packed_simd = { package = "packed_simd", path = "../.." }

[lib]
name = "dot_product_lib"
path = "src/lib.rs"


================================================
FILE: examples/dot_product/readme.md
================================================
# Vector dot product


================================================
FILE: examples/dot_product/src/lib.rs
================================================
//! Vector dot product
#![deny(rust_2018_idioms)]
#![feature(custom_inner_attributes)]
#![allow(clippy::must_use_candidate, clippy::float_cmp)]

pub mod scalar;
pub mod simd;

#[cfg(test)]
#[rustfmt::skip]
fn test<F: Fn(&[f32], &[f32]) -> f32>(f: F) {
    let tests: &[(&[f32], &[f32], f32)] = &[
        (&[0_f32, 0., 0., 0.], &[0_f32, 0., 0., 0.], 0_f32),
        (&[0_f32, 0., 0., 1.], &[0_f32, 0., 0., 1.], 1_f32),
        (&[1_f32, 2., 3., 4.], &[0_f32, 0., 0., 0.], 0_f32),
        (&[1_f32, 2., 3., 4.], &[1_f32, 2., 3., 4.], 30_f32),
        (&[1_f32, 2., 3., 4., 1., 2., 3., 4.], &[1_f32, 1., 1., 1., 1., 1., 1., 1.], 20_f32),
    ];

    for &(a, b, output) in tests {
        assert_eq!(f(a, b), output);
    }
}


================================================
FILE: examples/dot_product/src/scalar.rs
================================================
//! Scalar implementation

pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len());
    a.iter().zip(b.iter()).map(|v| v.0 * v.1).sum()
}

#[cfg(test)]
#[test]
fn test() {
    crate::test(dot_prod)
}


================================================
FILE: examples/dot_product/src/simd.rs
================================================
//! Scalar implementation

use packed_simd::f32x4;

pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len());
    assert!(a.len() % 4 == 0);

    a.chunks_exact(4)
        .map(f32x4::from_slice_unaligned)
        .zip(b.chunks_exact(4).map(f32x4::from_slice_unaligned))
        .map(|(a, b)| a * b)
        .sum::<f32x4>()
        .sum()
}

#[cfg(test)]
#[test]
fn test() {
    crate::test(dot_prod)
}


================================================
FILE: examples/fannkuch_redux/Cargo.toml
================================================
[package]
name = "fannkuch_redux"
version = "0.1.0"
authors = ["gnzlbg <gonzalobg88@gmail.com>"]
edition = "2018"

[dependencies]
packed_simd = { package = "packed_simd", path = "../.." }

[[bin]]
name = "fannkuch_redux"
path = "src/main.rs"

[lib]
name = "fannkuch_redux_lib"
path = "src/lib.rs"


================================================
FILE: examples/fannkuch_redux/readme.md
================================================
# Fannkuch redux

This is the [`fannkuch redux` benchmark from the benchmarksgame][bg]. 

## Background and description

The fannkuch benchmark is defined by programs in [Performing Lisp Analysis of
the FANNKUCH
Benchmark](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.35.5124),
Kenneth R. Anderson and Duane Rettig. FANNKUCH is an abbreviation for the German
word __Pfannkuchen_, or pancakes, in analogy to flipping pancakes. The conjecture
is that the maximum count is approximated by `n*log(n)` when `n` goes to infinity.

Each program should:

* Take a permutation of `{1,...,n}`, for example: `{4,2,1,5,3}`.

* Take the first element, here `4`, and reverse the order of the first `4`
  elements: `{5,1,2,4,3}`.

* Repeat this until the first element is a `1`, so flipping won't change
  anything more: `{3,4,2,1,5}`, `{2,4,3,1,5}`, `{4,2,3,1,5}`, `{1,3,2,4,5}`.

* Count the number of flips, here `5`.

* Keep a checksum

  * `checksum = checksum + (if permutation_index is even then flips_count else
    -flips_count)`

  * `checksum = checksum + (toggle_sign_-1_1 * flips_count)`

* Do this for all `n!` permutations, and record the maximum number of flips
  needed for any permutation.

## Usage

It takes two arguments in this order:

* `n`: the input sequence length: `{1, ..., n}`
* (optional) `algorithm`: the algorithm to use - defaults to the fastest one.
  * `0`: scalar algorithm
  * `1`: SIMD algorithm

[bg]: https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/fannkuchredux.html#fannkuchredux


================================================
FILE: examples/fannkuch_redux/src/fannkuchredux-output.txt
================================================
228
Pfannkuchen(7) = 16


================================================
FILE: examples/fannkuch_redux/src/lib.rs
================================================
//! Fannkuch redux
#![deny(warnings, rust_2018_idioms)]
#![allow(non_snake_case, non_camel_case_types)]
#![allow(
    clippy::similar_names,
    clippy::many_single_char_names,
    clippy::cast_possible_truncation,
    clippy::cast_sign_loss,
    clippy::cast_possible_wrap,
    clippy::must_use_candidate,
    clippy::float_cmp
)]

pub mod scalar;
pub mod simd;

pub fn fannkuch_redux(n: usize, alg: usize) -> (i32, i32) {
    match alg {
        0 => simd::fannkuch_redux(n),
        1 => scalar::fannkuch_redux(n),
        v => panic!("unknown algorithm value: {}", v),
    }
}


================================================
FILE: examples/fannkuch_redux/src/main.rs
================================================
#![deny(rust_2018_idioms)]

use fannkuch_redux_lib::*;

fn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {
    let (checksum, maxflips) = fannkuch_redux(n, alg);
    writeln!(o, "{}\nPfannkuchen({}) = {}", checksum, n, maxflips).unwrap();
}

fn main() {
    let n: usize =
        std::env::args().nth(1).expect("need one arg").parse().unwrap();
    assert!((3..=14).contains(&n), "n = {} is out-of-range [3, 14]", n);
    let alg = if let Some(v) = std::env::args().nth(2) {
        v.parse().unwrap()
    } else {
        0
    };

    run(&mut std::io::stdout(), n, alg);
}

#[cfg(test)]
mod tests {
    use super::*;
    static OUTPUT: &[u8] = include_bytes!("fannkuchredux-output.txt");
    #[test]
    fn verify_output_simd() {
        let mut out: Vec<u8> = Vec::new();

        run(&mut out, 7, 0);

        assert_eq!(out.len(), OUTPUT.len());
        if out != OUTPUT {
            for i in 0..out.len() {
                assert_eq!(
                    out[i], OUTPUT[i],
                    "byte {} differs - is: {:#08b} - should: {:#08b}",
                    i, out[i], OUTPUT[i]
                );
            }
        }
    }
    #[test]
    fn verify_output_scalar() {
        let mut out: Vec<u8> = Vec::new();

        run(&mut out, 7, 1);

        assert_eq!(out.len(), OUTPUT.len());
        if out != OUTPUT {
            for i in 0..out.len() {
                assert_eq!(
                    out[i], OUTPUT[i],
                    "byte {} differs - is: {:#08b} - should: {:#08b}",
                    i, out[i], OUTPUT[i]
                );
            }
        }
    }

}


================================================
FILE: examples/fannkuch_redux/src/scalar.rs
================================================
//! Scalar fannkuch redux implementation

use std::{cmp, mem, thread};

// FIXME: replace with slice rotate
fn rotate(x: &mut [i32]) {
    let mut prev = x[0];
    for place in x.iter_mut().rev() {
        prev = mem::replace(place, prev)
    }
}

fn next_permutation(perm: &mut [i32], count: &mut [i32]) {
    for i in 1..perm.len() {
        rotate(&mut perm[..=i]);
        let count_i = &mut count[i];
        if *count_i >= i as i32 {
            *count_i = 0;
        } else {
            *count_i += 1;
            break;
        }
    }
}

#[derive(Clone, Copy)]
struct P {
    p: [i32; 16],
}

#[derive(Clone, Copy)]
struct Perm {
    cnt: [i32; 16],
    fact: [u32; 16],
    n: u32,
    permcount: u32,
    perm: P,
}

impl Perm {
    fn new(n: u32) -> Self {
        let mut fact = [1; 16];
        for i in 1..=n as usize {
            fact[i] = fact[i - 1] * i as u32;
        }
        Self { cnt: [0; 16], fact, n, permcount: 0, perm: P { p: [0; 16] } }
    }

    fn get(&mut self, mut idx: i32) -> P {
        let mut pp = [0_u8; 16];
        self.permcount = idx as u32;
        for (i, place) in self.perm.p.iter_mut().enumerate() {
            *place = i as i32 + 1;
        }

        for i in (1..self.n as usize).rev() {
            let d = idx / self.fact[i] as i32;
            self.cnt[i] = d;
            idx %= self.fact[i] as i32;
            for (place, val) in pp.iter_mut().zip(self.perm.p[..=i].iter()) {
                *place = (*val) as u8
            }

            let d = d as usize;
            for j in 0..=i {
                self.perm.p[j] = i32::from(if j + d <= i {
                    pp[j + d]
                } else {
                    pp[j + d - i - 1]
                });
            }
        }

        self.perm
    }

    fn count(&self) -> u32 {
        self.permcount
    }
    fn max(&self) -> u32 {
        self.fact[self.n as usize]
    }

    fn next(&mut self) -> P {
        next_permutation(&mut self.perm.p, &mut self.cnt);
        self.permcount += 1;

        self.perm
    }
}

fn reverse(tperm: &mut [i32], k: usize) {
    tperm[..k].reverse()
}

fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) {
    let mut checksum = 0;
    let mut maxflips = 0;

    let mut p = perm.get(n as i32);

    while perm.count() < max as u32 {
        let mut flips = 0;

        while p.p[0] != 1 {
            let k = p.p[0] as usize;
            reverse(&mut p.p, k);
            flips += 1;
        }

        checksum += if perm.count() % 2 == 0 { flips } else { -flips };
        maxflips = cmp::max(maxflips, flips);

        p = perm.next();
    }

    (checksum, maxflips)
}

pub fn fannkuch_redux(n: usize) -> (i32, i32) {
    let perm = Perm::new(n as u32);

    let m = 1;
    let mut futures = vec![];
    let k = perm.max() / m;

    for j in (0..).map(|x| x * k).take_while(|&j| j < k * m) {
        let max = cmp::min(j + k, perm.max());

        futures
            .push(thread::spawn(move || work(perm, j as usize, max as usize)))
    }

    let mut checksum = 0;
    let mut maxflips = 0;
    for fut in futures {
        let (cs, mf) = fut.join().unwrap();
        checksum += cs;
        maxflips = cmp::max(maxflips, mf);
    }
    (checksum, maxflips)
}

#[cfg(test)]
#[test]
fn test() {
    assert_eq!(fannkuch_redux(7), (228, 16));
}


================================================
FILE: examples/fannkuch_redux/src/simd.rs
================================================
//! Vectorized fannkuch redux implementation

use packed_simd::*;

struct State {
    s: [u8; 16],
    flip_masks: [u8x16; 16],
    rotate_masks: [u8x16; 16],

    maxflips: i32,
    odd: u16,
    checksum: i32,
}

impl Default for State {
    fn default() -> Self {
        Self {
            s: [0; 16],
            flip_masks: [u8x16::splat(0); 16],
            rotate_masks: [u8x16::splat(0); 16],

            maxflips: 0,
            odd: 0,
            checksum: 0,
        }
    }
}

impl State {
    fn rotate_sisd(&mut self, n: usize) {
        let c = self.s[0];
        for i in 1..=n {
            self.s[i - 1] = self.s[i];
        }
        self.s[n] = c;
    }
    fn popmasks(&mut self) {
        let mut mask = [0_u8; 16];
        for i in 0..16 {
            for (j, m) in mask.iter_mut().enumerate() {
                *m = j as u8;
            }

            for x in 0..(i + 1) / 2 {
                mask.swap(x, i - x);
            }

            self.flip_masks[i] = u8x16::from_slice_unaligned(&mask);

            for (j, s) in self.s.iter_mut().enumerate() {
                *s = j as u8;
            }
            self.rotate_sisd(i);
            self.rotate_masks[i] = self.load_s();
        }
    }
    fn rotate(&mut self, n: usize) {
        self.load_s()
            .shuffle1_dyn(self.rotate_masks[n])
            .write_to_slice_unaligned(&mut self.s)
    }

    fn load_s(&self) -> u8x16 {
        u8x16::from_slice_unaligned(&self.s)
    }

    fn tk(&mut self, n: usize) {
        #[derive(Copy, Clone, Debug)]
        struct Perm {
            perm: u8x16,
            start: u8,
            odd: u16,
        }

        let mut perms = [Perm { perm: u8x16::splat(0), start: 0, odd: 0 }; 60];

        let mut i = 0;
        let mut c = [0_u8; 16];
        let mut perm_max = 0;
        // Cache this locall

Download .txt

gitextract_ltzo2pap/

├── .appveyor.yml
├── .github/
│   └── workflows/
│       ├── benchmarks.yml
│       ├── ci.yml
│       ├── docs.yml
│       └── run-ci-script.yml
├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── bors.toml
├── build.rs
├── ci/
│   ├── all.sh
│   ├── android-install-ndk.sh
│   ├── android-install-sdk.sh
│   ├── android-sysimage.sh
│   ├── benchmark.sh
│   ├── deploy_and_run_on_ios_simulator.rs
│   ├── docker/
│   │   ├── aarch64-linux-android/
│   │   │   └── Dockerfile
│   │   ├── aarch64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── arm-unknown-linux-gnueabi/
│   │   │   └── Dockerfile
│   │   ├── arm-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── armv7-linux-androideabi/
│   │   │   └── Dockerfile
│   │   ├── armv7-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── i586-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── i686-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── mips-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── mips64-unknown-linux-gnuabi64/
│   │   │   └── Dockerfile
│   │   ├── mips64el-unknown-linux-gnuabi64/
│   │   │   └── Dockerfile
│   │   ├── mipsel-unknown-linux-musl/
│   │   │   └── Dockerfile
│   │   ├── powerpc-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── powerpc64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── powerpc64le-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── s390x-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── sparc64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   ├── thumbv7neon-linux-androideabi/
│   │   │   └── Dockerfile
│   │   ├── thumbv7neon-unknown-linux-gnueabihf/
│   │   │   └── Dockerfile
│   │   ├── wasm32-unknown-unknown/
│   │   │   └── Dockerfile
│   │   ├── x86_64-linux-android/
│   │   │   └── Dockerfile
│   │   ├── x86_64-unknown-linux-gnu/
│   │   │   └── Dockerfile
│   │   └── x86_64-unknown-linux-gnu-emulated/
│   │       └── Dockerfile
│   ├── dox.sh
│   ├── linux-s390x.sh
│   ├── linux-sparc64.sh
│   ├── lld-shim.rs
│   ├── max_line_width.sh
│   ├── run-docker.sh
│   ├── run.sh
│   ├── run_examples.sh
│   ├── runtest-android.rs
│   ├── setup_benchmarks.sh
│   └── test-runner-linux
├── contributing.md
├── examples/
│   ├── Cargo.toml
│   ├── aobench/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   ├── ambient_occlusion.rs
│   │   │   ├── isec_plane.rs
│   │   │   ├── isec_sphere.rs
│   │   │   ├── random.rs
│   │   │   └── scanlines.rs
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── rustfmt.toml
│   │   ├── src/
│   │   │   ├── ambient_occlusion.rs
│   │   │   ├── geometry/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── plane.rs
│   │   │   │   ├── ray.rs
│   │   │   │   ├── rayxN.rs
│   │   │   │   ├── sphere.rs
│   │   │   │   ├── vec.rs
│   │   │   │   └── vecxN.rs
│   │   │   ├── image.rs
│   │   │   ├── intersection/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── packet.rs
│   │   │   │   ├── ray_plane.rs
│   │   │   │   ├── ray_sphere.rs
│   │   │   │   └── single.rs
│   │   │   ├── ispc_.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── random.rs
│   │   │   ├── scalar.rs
│   │   │   ├── scalar_parallel.rs
│   │   │   ├── scene/
│   │   │   │   ├── mod.rs
│   │   │   │   ├── random.rs
│   │   │   │   └── test.rs
│   │   │   ├── tiled.rs
│   │   │   ├── tiled_parallel.rs
│   │   │   ├── vector.rs
│   │   │   └── vector_parallel.rs
│   │   └── volta/
│   │       ├── .gitignore
│   │       └── ao.ispc
│   ├── dot_product/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── fannkuch_redux/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── fannkuchredux-output.txt
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── mandelbrot/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_tasks.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── mandelbrot-output.txt
│   │   │   ├── scalar_par.rs
│   │   │   └── simd_par.rs
│   │   └── volta/
│   │       └── mandelbrot.ispc
│   ├── matrix_inverse/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── nbody/
│   │   ├── Cargo.toml
│   │   ├── benches/
│   │   │   └── algs.rs
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── nbody-output.txt
│   │       ├── scalar.rs
│   │       └── simd.rs
│   ├── options_pricing/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── scalar.rs
│   │   │   ├── simd.rs
│   │   │   ├── simd_kernels.rs
│   │   │   ├── simd_par.rs
│   │   │   └── sum.rs
│   │   └── volta/
│   │       ├── options.ispc
│   │       └── options_defs.h
│   ├── rust-toolchain
│   ├── slice_sum/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       └── main.rs
│   ├── spectral_norm/
│   │   ├── Cargo.toml
│   │   ├── readme.md
│   │   └── src/
│   │       ├── lib.rs
│   │       ├── main.rs
│   │       ├── scalar.rs
│   │       ├── simd.rs
│   │       └── spectralnorm-output.txt
│   ├── stencil/
│   │   ├── Cargo.toml
│   │   ├── benchmark.sh
│   │   ├── build.rs
│   │   ├── readme.md
│   │   ├── src/
│   │   │   ├── ispc_loops.rs
│   │   │   ├── lib.rs
│   │   │   ├── main.rs
│   │   │   ├── scalar.rs
│   │   │   ├── simd.rs
│   │   │   └── simd_par.rs
│   │   └── volta/
│   │       ├── .gitignore
│   │       ├── Makefile
│   │       ├── common.mk
│   │       ├── stencil.cpp
│   │       ├── stencil.ispc
│   │       ├── stencil_serial.cpp
│   │       ├── tasksys.cpp
│   │       └── timing.h
│   └── triangle_xform/
│       ├── Cargo.toml
│       ├── readme.md
│       └── src/
│           ├── lib.rs
│           ├── scalar.rs
│           └── simd.rs
├── micro_benchmarks/
│   ├── Cargo.toml
│   ├── benches/
│   │   └── mask_reductions.rs
│   └── rust-toolchain
├── perf-guide/
│   ├── .gitignore
│   ├── book.toml
│   └── src/
│       ├── SUMMARY.md
│       ├── ascii.css
│       ├── bound_checks.md
│       ├── float-math/
│       │   ├── approx.md
│       │   ├── fma.md
│       │   ├── fp.md
│       │   └── svml.md
│       ├── introduction.md
│       ├── prof/
│       │   ├── linux.md
│       │   ├── mca.md
│       │   └── profiling.md
│       ├── target-feature/
│       │   ├── attribute.md
│       │   ├── features.md
│       │   ├── inlining.md
│       │   ├── practice.md
│       │   ├── runtime.md
│       │   └── rustflags.md
│       └── vert-hor-ops.md
├── rust-toolchain
├── rustfmt.toml
├── src/
│   ├── api/
│   │   ├── bit_manip.rs
│   │   ├── bitmask.rs
│   │   ├── cast/
│   │   │   ├── macros.rs
│   │   │   ├── v128.rs
│   │   │   ├── v16.rs
│   │   │   ├── v256.rs
│   │   │   ├── v32.rs
│   │   │   ├── v512.rs
│   │   │   └── v64.rs
│   │   ├── cast.rs
│   │   ├── cmp/
│   │   │   ├── eq.rs
│   │   │   ├── ord.rs
│   │   │   ├── partial_eq.rs
│   │   │   ├── partial_ord.rs
│   │   │   └── vertical.rs
│   │   ├── cmp.rs
│   │   ├── default.rs
│   │   ├── fmt/
│   │   │   ├── binary.rs
│   │   │   ├── debug.rs
│   │   │   ├── lower_hex.rs
│   │   │   ├── octal.rs
│   │   │   └── upper_hex.rs
│   │   ├── fmt.rs
│   │   ├── from/
│   │   │   ├── from_array.rs
│   │   │   └── from_vector.rs
│   │   ├── from.rs
│   │   ├── hash.rs
│   │   ├── into_bits/
│   │   │   ├── arch_specific.rs
│   │   │   ├── macros.rs
│   │   │   ├── v128.rs
│   │   │   ├── v16.rs
│   │   │   ├── v256.rs
│   │   │   ├── v32.rs
│   │   │   ├── v512.rs
│   │   │   └── v64.rs
│   │   ├── into_bits.rs
│   │   ├── math/
│   │   │   ├── float/
│   │   │   │   ├── abs.rs
│   │   │   │   ├── consts.rs
│   │   │   │   ├── cos.rs
│   │   │   │   ├── exp.rs
│   │   │   │   ├── ln.rs
│   │   │   │   ├── mul_add.rs
│   │   │   │   ├── mul_adde.rs
│   │   │   │   ├── powf.rs
│   │   │   │   ├── recpre.rs
│   │   │   │   ├── rsqrte.rs
│   │   │   │   ├── sin.rs
│   │   │   │   ├── sqrt.rs
│   │   │   │   ├── sqrte.rs
│   │   │   │   └── tanh.rs
│   │   │   └── float.rs
│   │   ├── math.rs
│   │   ├── minimal/
│   │   │   ├── iuf.rs
│   │   │   ├── mask.rs
│   │   │   └── ptr.rs
│   │   ├── minimal.rs
│   │   ├── ops/
│   │   │   ├── scalar_arithmetic.rs
│   │   │   ├── scalar_bitwise.rs
│   │   │   ├── scalar_mask_bitwise.rs
│   │   │   ├── scalar_shifts.rs
│   │   │   ├── vector_arithmetic.rs
│   │   │   ├── vector_bitwise.rs
│   │   │   ├── vector_float_min_max.rs
│   │   │   ├── vector_int_min_max.rs
│   │   │   ├── vector_mask_bitwise.rs
│   │   │   ├── vector_neg.rs
│   │   │   ├── vector_rotates.rs
│   │   │   └── vector_shifts.rs
│   │   ├── ops.rs
│   │   ├── ptr/
│   │   │   └── gather_scatter.rs
│   │   ├── ptr.rs
│   │   ├── reductions/
│   │   │   ├── bitwise.rs
│   │   │   ├── float_arithmetic.rs
│   │   │   ├── integer_arithmetic.rs
│   │   │   ├── mask.rs
│   │   │   └── min_max.rs
│   │   ├── reductions.rs
│   │   ├── select.rs
│   │   ├── shuffle.rs
│   │   ├── shuffle1_dyn.rs
│   │   ├── slice/
│   │   │   ├── from_slice.rs
│   │   │   └── write_to_slice.rs
│   │   ├── slice.rs
│   │   └── swap_bytes.rs
│   ├── api.rs
│   ├── codegen/
│   │   ├── bit_manip.rs
│   │   ├── llvm.rs
│   │   ├── math/
│   │   │   ├── float/
│   │   │   │   ├── abs.rs
│   │   │   │   ├── cos.rs
│   │   │   │   ├── cos_pi.rs
│   │   │   │   ├── exp.rs
│   │   │   │   ├── ln.rs
│   │   │   │   ├── macros.rs
│   │   │   │   ├── mul_add.rs
│   │   │   │   ├── mul_adde.rs
│   │   │   │   ├── powf.rs
│   │   │   │   ├── sin.rs
│   │   │   │   ├── sin_cos_pi.rs
│   │   │   │   ├── sin_pi.rs
│   │   │   │   ├── sqrt.rs
│   │   │   │   ├── sqrte.rs
│   │   │   │   └── tanh.rs
│   │   │   └── float.rs
│   │   ├── math.rs
│   │   ├── pointer_sized_int.rs
│   │   ├── reductions/
│   │   │   ├── mask/
│   │   │   │   ├── aarch64.rs
│   │   │   │   ├── arm.rs
│   │   │   │   ├── fallback.rs
│   │   │   │   ├── fallback_impl.rs
│   │   │   │   ├── x86/
│   │   │   │   │   ├── avx.rs
│   │   │   │   │   ├── avx2.rs
│   │   │   │   │   ├── sse.rs
│   │   │   │   │   └── sse2.rs
│   │   │   │   └── x86.rs
│   │   │   └── mask.rs
│   │   ├── reductions.rs
│   │   ├── shuffle.rs
│   │   ├── shuffle1_dyn.rs
│   │   ├── swap_bytes.rs
│   │   ├── v128.rs
│   │   ├── v16.rs
│   │   ├── v256.rs
│   │   ├── v32.rs
│   │   ├── v512.rs
│   │   ├── v64.rs
│   │   ├── vPtr.rs
│   │   └── vSize.rs
│   ├── codegen.rs
│   ├── lib.rs
│   ├── masks.rs
│   ├── sealed.rs
│   ├── testing/
│   │   ├── macros.rs
│   │   └── utils.rs
│   ├── testing.rs
│   ├── v128.rs
│   ├── v16.rs
│   ├── v256.rs
│   ├── v32.rs
│   ├── v512.rs
│   ├── v64.rs
│   ├── vPtr.rs
│   └── vSize.rs
├── tests/
│   └── endianness.rs
└── verify/
    └── verify/
        ├── Cargo.toml
        ├── readme.md
        ├── rust-toolchain
        └── src/
            ├── api/
            │   ├── math/
            │   │   └── float/
            │   │       ├── mod.rs
            │   │       └── mul_add.rs
            │   ├── math.rs
            │   ├── ops/
            │   │   ├── vector_rotates/
            │   │   │   └── x86.rs
            │   │   └── vector_rotates.rs
            │   ├── ops.rs
            │   ├── reductions/
            │   │   ├── mask/
            │   │   │   ├── avx.rs
            │   │   │   ├── avx2.rs
            │   │   │   ├── sse.rs
            │   │   │   └── sse2.rs
            │   │   └── mask.rs
            │   └── reductions.rs
            ├── api.rs
            └── lib.rs

Download .txt

SYMBOL INDEX (747 symbols across 114 files)

FILE: build.rs
  function main (line 1) | fn main() {

FILE: ci/deploy_and_run_on_ios_simulator.rs
  function package_as_simulator_app (line 34) | fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
  function start_simulator (line 59) | fn start_simulator() {
  function install_app_to_simulator (line 105) | fn install_app_to_simulator() {
  function run_app_on_simulator (line 116) | fn run_app_on_simulator() {
  type CheckStatus (line 151) | trait CheckStatus {
    method check_status (line 152) | fn check_status(&mut self);
    method check_status (line 156) | fn check_status(&mut self) {
  function main (line 162) | fn main() {

FILE: ci/lld-shim.rs
  function main (line 5) | fn main() {

FILE: ci/runtest-android.rs
  function main (line 5) | fn main() {

FILE: examples/aobench/benches/ambient_occlusion.rs
  function hit_scalar (line 9) | fn hit_scalar(c: &mut Criterion) {
  function hit_vector (line 26) | fn hit_vector(c: &mut Criterion) {

FILE: examples/aobench/benches/isec_plane.rs
  function hit_scalar (line 10) | fn hit_scalar(c: &mut Criterion) {
  function miss_scalar (line 53) | fn miss_scalar(c: &mut Criterion) {
  function hit_vector (line 96) | fn hit_vector(c: &mut Criterion) {
  function miss_vector (line 139) | fn miss_vector(c: &mut Criterion) {

FILE: examples/aobench/benches/isec_sphere.rs
  function hit_scalar (line 9) | fn hit_scalar(c: &mut Criterion) {
  function miss_scalar (line 49) | fn miss_scalar(c: &mut Criterion) {
  function hit_vector (line 88) | fn hit_vector(c: &mut Criterion) {
  function miss_vector (line 127) | fn miss_vector(c: &mut Criterion) {

FILE: examples/aobench/benches/random.rs
  function random_scalar (line 8) | fn random_scalar(c: &mut Criterion) {
  function random_vector (line 21) | fn random_vector(c: &mut Criterion) {

FILE: examples/aobench/benches/scanlines.rs
  function scanlines_scalar (line 6) | fn scanlines_scalar(b: &mut Bencher) {
  function scanlines_vector (line 22) | fn scanlines_vector(b: &mut Bencher) {

FILE: examples/aobench/build.rs
  function main (line 1) | fn main() {

FILE: examples/aobench/src/ambient_occlusion.rs
  function scalar (line 10) | pub fn scalar<S: Scene>(scene: &mut S, isect: &Isect) -> f32 {
  function vector (line 49) | pub fn vector<S: Scene>(scene: &mut S, isect: &Isect) -> f32 {
  function vector_tiled (line 92) | pub fn vector_tiled<S: Scene>(scene: &mut S, isect: &IsectxN) -> f32xN {
  function sanity_hit (line 134) | fn sanity_hit() {
  function sanity_miss (line 161) | fn sanity_miss() {

FILE: examples/aobench/src/geometry/mod.rs
  type f32xN (line 22) | pub type f32xN = f32x8;
  type u32xN (line 24) | pub type u32xN = u32x8;
  type usizexN (line 26) | pub type usizexN = usizex8;
  type m32xN (line 28) | pub type m32xN = m32x8;
  type pf32xN (line 30) | pub type pf32xN = Simd<[*mut f32; 8]>;
  type f32xN (line 33) | pub type f32xN = f32x4;
  type u32xN (line 35) | pub type u32xN = u32x4;
  type usizexN (line 37) | pub type usizexN = usizex4;
  type m32xN (line 39) | pub type m32xN = m32x4;
  type pf32xN (line 41) | pub type pf32xN = Simd<[*mut f32; 4]>;
  type IncrV (line 43) | pub trait IncrV {
    method incr (line 45) | fn incr(x: Self::Element, step: Self::Element) -> Self;
    type Element (line 49) | type Element = f32;
    method incr (line 51) | fn incr(x: f32, step: f32) -> Self {
    type Element (line 78) | type Element = u32;
    method incr (line 80) | fn incr(x: u32, step: u32) -> Self {
    type Element (line 102) | type Element = usize;
    method incr (line 104) | fn incr(x: usize, step: usize) -> Self {

FILE: examples/aobench/src/geometry/plane.rs
  type Plane (line 6) | pub struct Plane {

FILE: examples/aobench/src/geometry/ray.rs
  type Ray (line 7) | pub struct Ray {

FILE: examples/aobench/src/geometry/rayxN.rs
  type RayxN (line 7) | pub struct RayxN {
    method get (line 13) | pub fn get(&self, idx: usize) -> Ray {

FILE: examples/aobench/src/geometry/sphere.rs
  type Sphere (line 6) | pub struct Sphere {

FILE: examples/aobench/src/geometry/vec.rs
  type V3D (line 6) | pub struct V3D {
    method cross (line 29) | pub fn cross(self, o: Self) -> Self {
    method normalized (line 38) | pub fn normalized(self) -> Self {
    method ortho_basis (line 45) | pub fn ortho_basis(self) -> M3x3 {
    method almost_eq (line 66) | pub fn almost_eq(&self, rhs: &Self) -> bool {
    type Output (line 110) | type Output = Self;
    method mul (line 112) | fn mul(self, o: f32) -> Self::Output {
    type Output (line 160) | type Output = f32;
    method dot (line 162) | fn dot(self, o: Self) -> Self::Output {
  method default (line 15) | fn default() -> Self {
  type M3x3 (line 24) | pub type M3x3 = [V3D; 3];
    type Output (line 130) | type Output = V3D;
    method mul (line 132) | fn mul(self, o: V3D) -> Self::Output {
  type Output (line 75) | type Output = Self;
  method add (line 77) | fn add(self, o: Self) -> Self::Output {
  type Output (line 87) | type Output = Self;
  method sub (line 89) | fn sub(self, o: Self) -> Self::Output {
  type Output (line 99) | type Output = Self;
  method mul (line 100) | fn mul(self, o: Self) -> Self::Output {
  type Output (line 122) | type Output = V3D;
  function mul (line 124) | fn mul(self, o: V3D) -> Self::Output {
  type Dot (line 154) | pub trait Dot<O> {
    method dot (line 156) | fn dot(self, _: O) -> Self::Output;

FILE: examples/aobench/src/geometry/vecxN.rs
  type V3DxN (line 8) | pub struct V3DxN {
    method normalized (line 29) | pub fn normalized(self) -> Self {
    method get (line 35) | pub fn get(&self, idx: usize) -> V3D {
    method ortho_basis (line 45) | pub fn ortho_basis(self) -> [Self; 3] {
    method cross (line 68) | pub fn cross(self, o: Self) -> Self {
    type Output (line 138) | type Output = Self;
    method sub (line 140) | fn sub(self, o: V3D) -> Self::Output {
    type Output (line 150) | type Output = f32xN;
    method dot (line 152) | fn dot(self, o: Self) -> Self::Output {
    type Output (line 158) | type Output = f32xN;
    method dot (line 160) | fn dot(self, o: V3D) -> Self::Output {
  method default (line 17) | fn default() -> Self {
  type Output (line 78) | type Output = Self;
  method add (line 80) | fn add(self, o: Self) -> Self::Output {
  type Output (line 90) | type Output = Self;
  method mul (line 92) | fn mul(self, o: Self) -> Self::Output {
  type Output (line 102) | type Output = V3DxN;
  method mul (line 104) | fn mul(self, o: V3DxN) -> Self::Output {
  type Output (line 114) | type Output = V3DxN;
  function mul (line 116) | fn mul(self, o: V3DxN) -> Self::Output {
  type Selectable (line 168) | pub trait Selectable<O, P> {
    method sel (line 170) | fn sel(self, a: O, b: P) -> Self::Output;
  type Output (line 174) | type Output = f32xN;
  method sel (line 176) | fn sel(self, a: f32xN, b: f32xN) -> f32xN {
  type Output (line 182) | type Output = V3DxN;
  method sel (line 184) | fn sel(self, a: V3DxN, b: V3DxN) -> V3DxN {
  type Output (line 194) | type Output = V3DxN;
  method sel (line 196) | fn sel(self, a: V3D, b: V3DxN) -> V3DxN {
  type Output (line 206) | type Output = V3DxN;
  method mul (line 208) | fn mul(self, o: V3DxN) -> Self::Output {

FILE: examples/aobench/src/image.rs
  type Image (line 9) | pub struct Image {
    method new (line 17) | pub fn new(width: usize, height: usize) -> Self {
    method size (line 27) | pub fn size(&self) -> (usize, usize) {
    method write_png (line 34) | pub fn write_png(

FILE: examples/aobench/src/intersection/mod.rs
  type Intersect (line 4) | pub trait Intersect<I> {
    method intersect (line 6) | fn intersect(&self, other: &I, isect: Self::Isect) -> Self::Isect;

FILE: examples/aobench/src/intersection/packet.rs
  type IsectxN (line 8) | pub struct IsectxN {
    method get (line 28) | pub fn get(&self, idx: usize) -> Isect {
  method default (line 17) | fn default() -> Self {

FILE: examples/aobench/src/intersection/ray_plane.rs
  type Isect (line 8) | type Isect = Isect;
  method intersect (line 10) | fn intersect(&self, plane: &Plane, mut isect: Isect) -> Isect {
  type Isect (line 34) | type Isect = IsectxN;
  method intersect (line 36) | fn intersect(&self, plane: &Plane, mut isect: IsectxN) -> IsectxN {
  function sanity (line 78) | fn sanity() {
  function bug (line 179) | fn bug() {

FILE: examples/aobench/src/intersection/ray_sphere.rs
  type Isect (line 8) | type Isect = Isect;
  method intersect (line 10) | fn intersect(&self, sphere: &Sphere, mut isect: Isect) -> Isect {
  type Isect (line 35) | type Isect = IsectxN;
  method intersect (line 37) | fn intersect(&self, sphere: &Sphere, mut isect: IsectxN) -> IsectxN {
  function sanity (line 84) | fn sanity() {

FILE: examples/aobench/src/intersection/single.rs
  type Isect (line 7) | pub struct Isect {
    method almost_eq (line 29) | pub fn almost_eq(&self, rhs: &Self) -> bool {
  method default (line 16) | fn default() -> Self {

FILE: examples/aobench/src/ispc_.rs
  function ao (line 7) | pub fn ao<S: Scene>(
  function ao_tasks (line 23) | pub fn ao_tasks<S: Scene>(

FILE: examples/aobench/src/main.rs
  type Opt (line 13) | struct Opt {
  constant ALGORITHMS (line 28) | const ALGORITHMS: &[&str] = &[
  function main (line 39) | fn main() {

FILE: examples/aobench/src/random.rs
  type RngT (line 17) | struct RngT(u32, u32, u32, u32);
    method from_seed (line 20) | fn from_seed(x: u32) -> Self {
    method gen_u32 (line 31) | pub fn gen_u32(&mut self) -> u32 {
    method gen (line 43) | pub fn gen(&mut self) -> f32 {
    method from_seed (line 83) | fn from_seed(x: u32xN) -> Self {
    method gen_u32 (line 95) | pub fn gen_u32(&mut self) -> u32xN {
    method gen (line 108) | pub fn gen(&mut self) -> f32xN {
  type RngH (line 52) | pub struct RngH {
    method gen (line 57) | pub fn gen(&mut self) -> f32 {
    method gen (line 124) | pub fn gen(&mut self) -> f32xN {
  function thread_rng (line 68) | pub fn thread_rng() -> RngH {
  type RngT (line 80) | struct RngT(u32xN, u32xN, u32xN, u32xN);
    method from_seed (line 20) | fn from_seed(x: u32) -> Self {
    method gen_u32 (line 31) | pub fn gen_u32(&mut self) -> u32 {
    method gen (line 43) | pub fn gen(&mut self) -> f32 {
    method from_seed (line 83) | fn from_seed(x: u32xN) -> Self {
    method gen_u32 (line 95) | pub fn gen_u32(&mut self) -> u32xN {
    method gen (line 108) | pub fn gen(&mut self) -> f32xN {
  type RngH (line 118) | pub struct RngH {
    method gen (line 57) | pub fn gen(&mut self) -> f32 {
    method gen (line 124) | pub fn gen(&mut self) -> f32xN {
  function thread_rng (line 135) | pub fn thread_rng() -> RngH {

FILE: examples/aobench/src/scalar.rs
  function ao (line 8) | pub fn ao<S: Scene>(

FILE: examples/aobench/src/scalar_parallel.rs
  function ao (line 9) | pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Imag...

FILE: examples/aobench/src/scene/mod.rs
  type Scene (line 4) | pub trait Scene: Send + Sync + Default {
    constant NAO_SAMPLES (line 5) | const NAO_SAMPLES: usize;
    method rand (line 6) | fn rand(&mut self) -> f32;
    method plane (line 7) | fn plane(&self) -> &Plane;
    method spheres (line 8) | fn spheres(&self) -> &[Sphere];
    method rand_f32xN (line 9) | fn rand_f32xN(&mut self) -> (f32xN, f32xN) {

FILE: examples/aobench/src/scene/random.rs
  type Random (line 7) | pub struct Random {
  method default (line 13) | fn default() -> Self {
  constant NAO_SAMPLES (line 57) | const NAO_SAMPLES: usize = 8;
  method rand (line 59) | fn rand(&mut self) -> f32 {
  method plane (line 63) | fn plane(&self) -> &Plane {
  method spheres (line 67) | fn spheres(&self) -> &[Sphere] {
  method rand_f32xN (line 71) | fn rand_f32xN(&mut self) -> (f32xN, f32xN) {

FILE: examples/aobench/src/scene/test.rs
  type Test (line 8) | pub struct Test {
  method default (line 16) | fn default() -> Self {
  constant NAO_SAMPLES (line 71) | const NAO_SAMPLES: usize = 8;
  method rand (line 72) | fn rand(&mut self) -> f32 {
  method plane (line 82) | fn plane(&self) -> &Plane {
  method spheres (line 85) | fn spheres(&self) -> &[Sphere] {

FILE: examples/aobench/src/tiled.rs
  function ao_impl (line 10) | fn ao_impl<S: Scene>(

FILE: examples/aobench/src/tiled_parallel.rs
  function ao (line 9) | pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Imag...

FILE: examples/aobench/src/vector.rs
  function ao_impl (line 10) | fn ao_impl<S: Scene>(

FILE: examples/aobench/src/vector_parallel.rs
  function ao (line 9) | pub fn ao<S: Scene>(_: &mut S, nsubsamples: usize, img: &mut crate::Imag...

FILE: examples/dot_product/src/lib.rs
  function test (line 11) | fn test<F: Fn(&[f32], &[f32]) -> f32>(f: F) {

FILE: examples/dot_product/src/scalar.rs
  function dot_prod (line 3) | pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {
  function test (line 10) | fn test() {

FILE: examples/dot_product/src/simd.rs
  function dot_prod (line 5) | pub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {
  function test (line 19) | fn test() {

FILE: examples/fannkuch_redux/src/lib.rs
  function fannkuch_redux (line 17) | pub fn fannkuch_redux(n: usize, alg: usize) -> (i32, i32) {

FILE: examples/fannkuch_redux/src/main.rs
  function run (line 5) | fn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {
  function main (line 10) | fn main() {
  function verify_output_simd (line 28) | fn verify_output_simd() {
  function verify_output_scalar (line 45) | fn verify_output_scalar() {

FILE: examples/fannkuch_redux/src/scalar.rs
  function rotate (line 6) | fn rotate(x: &mut [i32]) {
  function next_permutation (line 13) | fn next_permutation(perm: &mut [i32], count: &mut [i32]) {
  type P (line 27) | struct P {
  type Perm (line 32) | struct Perm {
    method new (line 41) | fn new(n: u32) -> Self {
    method get (line 49) | fn get(&mut self, mut idx: i32) -> P {
    method count (line 77) | fn count(&self) -> u32 {
    method max (line 80) | fn max(&self) -> u32 {
    method next (line 84) | fn next(&mut self) -> P {
  function reverse (line 92) | fn reverse(tperm: &mut [i32], k: usize) {
  function work (line 96) | fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) {
  function fannkuch_redux (line 120) | pub fn fannkuch_redux(n: usize) -> (i32, i32) {
  function test (line 146) | fn test() {

FILE: examples/fannkuch_redux/src/simd.rs
  type State (line 5) | struct State {
    method rotate_sisd (line 30) | fn rotate_sisd(&mut self, n: usize) {
    method popmasks (line 37) | fn popmasks(&mut self) {
    method rotate (line 57) | fn rotate(&mut self, n: usize) {
    method load_s (line 63) | fn load_s(&self) -> u8x16 {
    method tk (line 67) | fn tk(&mut self, n: usize) {
  method default (line 16) | fn default() -> Self {
  function fannkuch_redux (line 179) | pub fn fannkuch_redux(n: usize) -> (i32, i32) {
  function test (line 193) | fn test() {

FILE: examples/mandelbrot/build.rs
  function main (line 1) | fn main() {

FILE: examples/mandelbrot/src/ispc_tasks.rs
  function generate (line 7) | pub fn generate(dims: Dimensions, xr: Range, yr: Range) -> Vec<u32> {

FILE: examples/mandelbrot/src/lib.rs
  type Range (line 33) | type Range = ops::Range<f64>;
  type Region (line 34) | type Region = (Range, Range);
  type Dimensions (line 37) | pub type Dimensions = (usize, usize);
  type Algorithm (line 41) | pub enum Algorithm {
  type Mandelbrot (line 50) | pub struct Mandelbrot {
    method generate (line 57) | pub fn generate(dims: Dimensions, algo: Algorithm) -> Self {
    method generate_region (line 63) | pub fn generate_region(
    method write_header (line 83) | fn write_header(
    method output_pbm (line 95) | pub fn output_pbm(&self, f: &mut dyn io::Write) -> io::Result<()> {
    method output_ppm (line 120) | pub fn output_ppm(&self, f: &mut dyn io::Write) -> io::Result<()> {
  constant DEFAULT_REGION (line 167) | const DEFAULT_REGION: (Range, Range) = (-1.5..0.5, -1.0..1.0);
  constant THRESHOLD (line 173) | const THRESHOLD: f64 = 4.0;
  constant ITER_LIMIT (line 179) | const ITER_LIMIT: u32 = 50;
  function verify_all (line 187) | fn verify_all() {
  function verify_algo (line 219) | fn verify_algo(algo: Algorithm) {
  function verify_output_scalar (line 249) | fn verify_output_scalar() {
  function verify_output_simd (line 255) | fn verify_output_simd() {

FILE: examples/mandelbrot/src/main.rs
  type Opt (line 15) | struct Opt {
  constant ALGORITHMS (line 30) | const ALGORITHMS: &[&str] = &["scalar", "simd", "ispc"];
  function main (line 32) | fn main() {

FILE: examples/mandelbrot/src/scalar_par.rs
  type Complex (line 8) | struct Complex {
    method diverged (line 16) | fn diverged(&self) -> bool {
  type MandelbrotIter (line 28) | struct MandelbrotIter {
    method new (line 37) | fn new(start: Complex) -> Self {
    method count (line 43) | fn count(mut self) -> u32 {
  type Item (line 57) | type Item = Complex;
  method next (line 61) | fn next(&mut self) -> Option<Complex> {
  function generate (line 78) | pub fn generate(dims: Dimensions, xr: Range, yr: Range) -> Vec<u32> {

FILE: examples/mandelbrot/src/simd_par.rs
  type u64s (line 7) | type u64s = u64x8;
  type u32s (line 8) | type u32s = u32x8;
  type f64s (line 9) | type f64s = f64x8;
  type m64s (line 10) | type m64s = m64x8;
  type Complex (line 15) | struct Complex {
    method undiverged (line 24) | fn undiverged(&self) -> m64s {
  type MandelbrotIter (line 36) | struct MandelbrotIter {
    method new (line 45) | fn new(start: Complex) -> Self {
    method count (line 55) | fn count(mut self) -> u32s {
  type Item (line 80) | type Item = Complex;
  method next (line 84) | fn next(&mut self) -> Option<Complex> {
  function generate (line 101) | pub fn generate(dims: Dimensions, xr: Range, yr: Range) -> Vec<u32> {

FILE: examples/matrix_inverse/src/lib.rs
  type Matrix4x4 (line 10) | pub struct Matrix4x4([[f32; 4]; 4]);
  function test (line 14) | fn test<F: Fn(Matrix4x4) -> Option<Matrix4x4>>(f: F) {

FILE: examples/matrix_inverse/src/scalar.rs
  function inv4x4 (line 6) | pub fn inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
  function test (line 149) | fn test() {

FILE: examples/matrix_inverse/src/simd.rs
  function inv4x4 (line 7) | pub fn inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
  function test (line 106) | fn test() {

FILE: examples/nbody/benches/algs.rs
  function simd (line 10) | fn simd(b: &mut Bencher) {
  function scalar (line 15) | fn scalar(b: &mut Bencher) {

FILE: examples/nbody/src/lib.rs
  function run (line 14) | pub fn run(n: usize, alg: usize) -> (f64, f64) {
  constant RESULTS (line 23) | const RESULTS: &[(usize, &str, &str)] =

FILE: examples/nbody/src/main.rs
  function run (line 6) | fn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {
  function main (line 13) | fn main() {
  function verify_output_simd (line 34) | fn verify_output_simd() {
  function verify_output_scalar (line 51) | fn verify_output_scalar() {

FILE: examples/nbody/src/scalar.rs
  constant SOLAR_MASS (line 8) | const SOLAR_MASS: f64 = 4.0 * PI * PI;
  constant DAYS_PER_YEAR (line 9) | const DAYS_PER_YEAR: f64 = 365.24;
  type Body (line 11) | struct Body {
  constant N_BODIES (line 17) | const N_BODIES: usize = 5;
  constant BODIES (line 19) | const BODIES: [Body; N_BODIES] = [
  function advance (line 80) | fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
  function energy (line 112) | fn energy(bodies: &[Body; N_BODIES]) -> f64 {
  function offset_momentum (line 133) | fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
  function shift_mut_ref (line 149) | fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> {
  function run (line 159) | pub fn run(n: usize) -> (f64, f64) {
  function test (line 173) | fn test() {

FILE: examples/nbody/src/simd.rs
  constant SOLAR_MASS (line 6) | const SOLAR_MASS: f64 = 4.0 * PI * PI;
  constant DAYS_PER_YEAR (line 7) | const DAYS_PER_YEAR: f64 = 365.24;
  type Body (line 9) | pub struct Body {
  constant N_BODIES (line 14) | const N_BODIES: usize = 5;
  constant BODIES (line 16) | const BODIES: [Body; N_BODIES] = [
  function offset_momentum (line 89) | pub fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
  function energy (line 98) | pub fn energy(bodies: &[Body; N_BODIES]) -> f64 {
  function advance (line 111) | pub fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
  function run_k (line 149) | pub fn run_k<K>(n: usize, k: K) -> (f64, f64)
  function run (line 164) | pub fn run(n: usize) -> (f64, f64) {
  function test (line 171) | fn test() {

FILE: examples/options_pricing/build.rs
  function main (line 1) | fn main() {

FILE: examples/options_pricing/src/ispc_.rs
  function serial (line 9) | pub fn serial(
  function tasks (line 26) | pub fn tasks(
  function serial (line 47) | pub fn serial(
  function tasks (line 64) | pub fn tasks(
  function black_scholes (line 86) | fn black_scholes() {
  function binomial_put (line 99) | fn binomial_put() {

FILE: examples/options_pricing/src/lib.rs
  constant BINOMIAL_NUM (line 17) | const BINOMIAL_NUM: usize = 64;
  type State (line 28) | pub struct State {
    method new (line 39) | pub fn new(count: usize) -> Self {
    method exec (line 50) | pub fn exec<F>(&mut self, model: F) -> f64
  function almost_equal (line 75) | fn almost_equal(a: f64, b: f64, max_rel_diff: f64) -> bool {

FILE: examples/options_pricing/src/main.rs
  function run (line 7) | fn run<F>(name: &str, count: usize, f: F)
  function main (line 29) | fn main() {

FILE: examples/options_pricing/src/scalar.rs
  function cnd (line 5) | fn cnd(x: f32) -> f32 {
  function black_scholes (line 27) | pub fn black_scholes(
  function binomial_put (line 44) | pub fn binomial_put(
  function black_scholes_ispc (line 87) | fn black_scholes_ispc() {
  function binomial_put_ispc (line 100) | fn binomial_put_ispc() {

FILE: examples/options_pricing/src/simd.rs
  function serial (line 5) | pub fn serial<K>(
  function black_scholes (line 27) | pub fn black_scholes(
  function binomial_put (line 43) | pub fn binomial_put(
  function black_scholes_scalar (line 64) | fn black_scholes_scalar() {
  function binomial_put_scalar (line 77) | fn binomial_put_scalar() {

FILE: examples/options_pricing/src/simd_kernels.rs
  function cnd (line 5) | pub fn cnd(x: f32s) -> f32s {
  function black_scholes (line 24) | pub fn black_scholes(s: f32s, x: f32s, t: f32s, r: f32s, v: f32s) -> f32s {
  function binomial_put (line 31) | pub fn binomial_put(s: f32s, x: f32s, t: f32s, r: f32s, v: f32s) -> f32s {

FILE: examples/options_pricing/src/simd_par.rs
  function parallel (line 5) | pub fn parallel<K>(
  function black_scholes (line 31) | pub fn black_scholes(
  function binomial_put (line 47) | pub fn binomial_put(
  function black_scholes_scalar (line 68) | fn black_scholes_scalar() {
  function binomial_put_scalar (line 81) | fn binomial_put_scalar() {

FILE: examples/options_pricing/src/sum.rs
  function slice (line 5) | pub fn slice(x: &[f32]) -> f64 {
  function slice_scalar (line 20) | pub fn slice_scalar(x: &[f32]) -> f64 {

FILE: examples/slice_sum/src/main.rs
  function init (line 6) | fn init(n: usize) -> Vec<f32> {
  function sum_ver (line 12) | fn sum_ver(x: &[f32]) -> f32 {
  function sum_hor (line 21) | fn sum_hor(x: &[f32]) -> f32 {
  function sum_ver_par (line 30) | fn sum_ver_par(x: &[f32]) -> f32 {
  function main (line 58) | fn main() {

FILE: examples/spectral_norm/src/lib.rs
  function A (line 12) | fn A(i: usize, j: usize) -> f64 {
  function spectral_norm (line 16) | pub fn spectral_norm(n: usize, alg: usize) -> f64 {

FILE: examples/spectral_norm/src/main.rs
  function run (line 4) | fn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {
  function main (line 9) | fn main() {
  function verify_output_simd (line 27) | fn verify_output_simd() {
  function verify_output_scalar (line 44) | fn verify_output_scalar() {

FILE: examples/spectral_norm/src/scalar.rs
  type f64x2 (line 9) | struct f64x2(f64, f64);
  type Output (line 11) | type Output = Self;
  method add (line 12) | fn add(self, rhs: Self) -> Self {
  type Output (line 17) | type Output = Self;
  method div (line 18) | fn div(self, rhs: Self) -> Self {
  function spectral_norm (line 23) | pub fn spectral_norm(n: usize) -> f64 {
  function mult_AtAv (line 35) | fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
  function mult_Av (line 40) | fn mult_Av(v: &[f64], out: &mut [f64]) {
  function mult_Atv (line 44) | fn mult_Atv(v: &[f64], out: &mut [f64]) {
  function mult (line 48) | fn mult<F>(v: &[f64], out: &mut [f64], start: usize, a: F)
  function dot (line 64) | fn dot(x: &[f64], y: &[f64]) -> f64 {
  function test (line 70) | fn test() {

FILE: examples/spectral_norm/src/simd.rs
  function mult_Av (line 6) | fn mult_Av(v: &[f64], out: &mut [f64]) {
  function mult_Atv (line 24) | fn mult_Atv(v: &[f64], out: &mut [f64]) {
  function mult_AtAv (line 42) | fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
  function spectral_norm (line 47) | pub fn spectral_norm(n: usize) -> f64 {
  function dot (line 61) | fn dot(x: &[f64], y: &[f64]) -> f64 {
  function test (line 68) | fn test() {

FILE: examples/stencil/build.rs
  function main (line 1) | fn main() {

FILE: examples/stencil/src/ispc_loops.rs
  function serial (line 6) | pub fn serial(
  function tasks (line 32) | pub fn tasks(

FILE: examples/stencil/src/lib.rs
  type Data (line 23) | pub struct Data {
    method default (line 35) | pub fn default() -> Self {
    method benchmark (line 39) | pub fn benchmark() -> Self {
    method from_bounds (line 43) | pub fn from_bounds(
    method new (line 55) | pub fn new(
    method reinit (line 75) | pub fn reinit(&mut self) {
    method exec (line 99) | pub fn exec<F>(&mut self, f: F)
  function assert_data_eq (line 116) | fn assert_data_eq(a: &Data, b: &Data) {

FILE: examples/stencil/src/main.rs
  function run (line 8) | fn run<F>(name: &str, f: F)
  function main (line 18) | fn main() {

FILE: examples/stencil/src/scalar.rs
  function step (line 3) | pub fn step(
  function scalar (line 43) | pub fn scalar(
  function scalar_ispc_verify (line 71) | fn scalar_ispc_verify() {

FILE: examples/stencil/src/simd.rs
  function step_x8 (line 6) | pub(crate) fn step_x8(
  function x8_impl (line 72) | fn x8_impl(
  function x8_impl_avx2 (line 110) | unsafe fn x8_impl_avx2(
  function x8_impl_avx (line 122) | unsafe fn x8_impl_avx(
  function x8_impl_sse42 (line 134) | unsafe fn x8_impl_sse42(
  function x8_impl_sse2 (line 146) | unsafe fn x8_impl_sse2(
  function x8_impl_def (line 156) | unsafe fn x8_impl_def(
  function x8 (line 166) | pub fn x8(
  function simd_scalar_verify (line 212) | fn simd_scalar_verify() {
  function simd_ispc_verify (line 224) | fn simd_ispc_verify() {

FILE: examples/stencil/src/simd_par.rs
  function x8_par_impl (line 6) | fn x8_par_impl(
  function x8_par_impl_avx2 (line 47) | unsafe fn x8_par_impl_avx2(
  function x8_par_impl_avx (line 60) | unsafe fn x8_par_impl_avx(
  function x8_par_impl_sse42 (line 73) | unsafe fn x8_par_impl_sse42(
  function x8_par_impl_sse2 (line 86) | unsafe fn x8_par_impl_sse2(
  function x8_par_impl_def (line 97) | unsafe fn x8_par_impl_def(
  function x8_par (line 108) | pub fn x8_par(
  function simd_par_verify (line 153) | fn simd_par_verify() {

FILE: examples/stencil/volta/stencil.cpp
  function InitData (line 53) | void InitData(int Nx, int Ny, int Nz, float *A[2], float *vsq) {
  function main (line 65) | int main(int argc, char *argv[]) {

FILE: examples/stencil/volta/stencil_serial.cpp
  function stencil_step_serial (line 35) | static void
  function loop_stencil_serial (line 69) | void loop_stencil_serial(int t0, int t1,

FILE: examples/stencil/volta/tasksys.cpp
  type TaskInfo (line 193) | struct TaskInfo {
    method taskCount (line 201) | int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount...
    method taskIndex0 (line 202) | int taskIndex0() const
    method taskIndex1 (line 206) | int taskIndex1() const
    method taskIndex2 (line 210) | int taskIndex2() const
    method taskCount0 (line 214) | int taskCount0() const { return taskCount3d[0]; }
    method taskCount1 (line 215) | int taskCount1() const { return taskCount3d[1]; }
    method taskCount2 (line 216) | int taskCount2() const { return taskCount3d[2]; }
    method TaskInfo (line 217) | TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
  class TaskGroup (line 242) | class TaskGroup
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroupBase (line 249) | class TaskGroupBase {
  function TaskInfo (line 325) | inline TaskInfo *
    method taskCount (line 201) | int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount...
    method taskIndex0 (line 202) | int taskIndex0() const
    method taskIndex1 (line 206) | int taskIndex1() const
    method taskIndex2 (line 210) | int taskIndex2() const
    method taskCount0 (line 214) | int taskCount0() const { return taskCount3d[0]; }
    method taskCount1 (line 215) | int taskCount1() const { return taskCount3d[1]; }
    method taskCount2 (line 216) | int taskCount2() const { return taskCount3d[2]; }
    method TaskInfo (line 217) | TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); }
  function lMemFence (line 373) | static inline void
  function lAtomicCompareAndSwap32 (line 393) | static int32_t
  function lAtomicAdd (line 404) | static inline int32_t
  class TaskGroup (line 417) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 429) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 446) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 477) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 488) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 499) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 510) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  class TaskGroup (line 522) | class TaskGroup : public TaskGroupBase {
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  function InitTaskSystem (line 545) | static void
  function lRunTask (line 564) | static void
  function InitTaskSystem (line 601) | static void
  function lRunTask (line 607) | static void __cdecl
  function InitTaskSystem (line 737) | static void
  function InitTaskSystem (line 953) | static void
  function InitTaskSystem (line 982) | static void
  function InitTaskSystem (line 1018) | static void
  function InitTaskSystem (line 1048) | static void
  function InitTaskSystem (line 1082) | static void
  function TaskGroup (line 1111) | static inline TaskGroup *
    method TaskGroup (line 431) | TaskGroup() {
    method TaskGroup (line 448) | TaskGroup() {
    method Reset (line 454) | void Reset() {
  function FreeTaskGroup (line 1127) | static inline void
  function ISPCLaunch (line 1144) | void
  function ISPCSync (line 1170) | void
  type Task (line 1201) | struct Task {
    method noMoreWork (line 1211) | inline int  noMoreWork() { return taskIndex >= taskCount; }
    method nextJob (line 1215) | inline int  nextJob() { return lAtomicAdd(&taskIndex,1); }
    method numJobs (line 1216) | inline int  numJobs() { return taskCount; }
    method schedule (line 1217) | inline void schedule(int idx) { taskIndex = 0; numDone = 0; liveIndex ...
    method markOneDone (line 1219) | inline void markOneDone() { lAtomicAdd(&numDone,1); }
    method wait (line 1220) | inline void wait()
  class TaskSys (line 1237) | class TaskSys {
    type LiveTask (line 1239) | struct LiveTask
      method doneWithThis (line 1250) | inline void doneWithThis() { lAtomicAdd(&locks,-1); }
      method LiveTask (line 1251) | LiveTask() : active(0), locks(-1) {}
    method TaskSys (line 1266) | TaskSys() : nextScheduleIndex(0)
    method Task (line 1276) | inline Task *allocOne()
    method init (line 1290) | static inline void init()
    method schedule (line 1304) | inline void schedule(Task *t)
    method sync (line 1321) | void sync(Task *task)
  function ISPCLaunch (line 1413) | void ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count)
  function ISPCSync (line 1423) | void ISPCSync(void *h)

FILE: examples/stencil/volta/timing.h
  function rdtsc (line 41) | __inline__ uint64_t rdtsc() {
  function rtc (line 58) | static inline double rtc(void)
  function rdtsc (line 76) | __inline__ uint64_t rdtsc() {
  function rtc (line 90) | static inline double rtc(void)
  function reset_and_start_timer (line 108) | static inline void reset_and_start_timer()
  function get_elapsed_mcycles (line 119) | static inline double get_elapsed_mcycles()
  function get_elapsed_msec (line 127) | static inline double get_elapsed_msec()

FILE: examples/triangle_xform/src/lib.rs
  type Matrix (line 6) | type Matrix = [[f32; 3]; 4];
  constant TRIANGLE_COUNT (line 18) | const TRIANGLE_COUNT: usize = 1 << 5;
  function compare_scalar_simd (line 21) | fn compare_scalar_simd() {

FILE: examples/triangle_xform/src/scalar.rs
  type Vertex (line 4) | pub type Vertex = [f32; 3];
  type Triangle (line 8) | pub struct Triangle(pub [Vertex; 3]);
    method transform (line 13) | pub fn transform(self, mat: Matrix) -> Self {
  method sample (line 46) | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Triangle {
  function translate (line 52) | fn translate() {

FILE: examples/triangle_xform/src/simd.rs
  type VecF (line 4) | pub type VecF = packed_simd::f32x8;
  type Triangle (line 8) | pub struct Triangle {
    method pack (line 16) | pub fn pack(tris: &[crate::scalar::Triangle]) -> Self {
    method unpack (line 44) | pub fn unpack(self) -> Vec<crate::scalar::Triangle> {
    method transform (line 61) | pub fn transform(self, mat: Matrix) -> Self {

FILE: micro_benchmarks/benches/mask_reductions.rs
  constant NO_ITERATIONS (line 9) | const NO_ITERATIONS: u32 = 1_000;

FILE: src/api/cast.rs
  type FromCast (line 32) | pub trait FromCast<T>: crate::marker::Sized {
    method from_cast (line 34) | fn from_cast(_: T) -> Self;
  type Cast (line 65) | pub trait Cast<T>: crate::marker::Sized {
    method cast (line 67) | fn cast(self) -> T;
  method cast (line 76) | fn cast(self) -> U {
  method from_cast (line 84) | fn from_cast(t: Self) -> Self {

FILE: src/api/into_bits.rs
  type FromBits (line 5) | pub trait FromBits<T>: crate::marker::Sized {
    method from_bits (line 7) | fn from_bits(t: T) -> Self;
  type IntoBits (line 12) | pub trait IntoBits<T>: crate::marker::Sized {
    method into_bits (line 14) | fn into_bits(self) -> T;
  method into_bits (line 23) | fn into_bits(self) -> U {
  method from_bits (line 32) | fn from_bits(t: Self) -> Self {

FILE: src/codegen/bit_manip.rs
  function ctlz_u8x2 (line 9) | fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
  function ctlz_u8x4 (line 11) | fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
  function ctlz_u8x8 (line 13) | fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
  function ctlz_u8x16 (line 15) | fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
  function ctlz_u8x32 (line 17) | fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
  function ctlz_u8x64 (line 19) | fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
  function ctlz_u16x2 (line 22) | fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
  function ctlz_u16x4 (line 24) | fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
  function ctlz_u16x8 (line 26) | fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
  function ctlz_u16x16 (line 28) | fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
  function ctlz_u16x32 (line 30) | fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
  function ctlz_u32x2 (line 33) | fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
  function ctlz_u32x4 (line 35) | fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
  function ctlz_u32x8 (line 37) | fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
  function ctlz_u32x16 (line 39) | fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
  function ctlz_u64x2 (line 42) | fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
  function ctlz_u64x4 (line 44) | fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
  function ctlz_u64x8 (line 46) | fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
  function ctlz_u128x1 (line 49) | fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
  function ctlz_u128x2 (line 51) | fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
  function ctlz_u128x4 (line 53) | fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
  function cttz_u8x2 (line 56) | fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
  function cttz_u8x4 (line 58) | fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
  function cttz_u8x8 (line 60) | fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
  function cttz_u8x16 (line 62) | fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
  function cttz_u8x32 (line 64) | fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
  function cttz_u8x64 (line 66) | fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
  function cttz_u16x2 (line 69) | fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
  function cttz_u16x4 (line 71) | fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
  function cttz_u16x8 (line 73) | fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
  function cttz_u16x16 (line 75) | fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
  function cttz_u16x32 (line 77) | fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
  function cttz_u32x2 (line 80) | fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
  function cttz_u32x4 (line 82) | fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
  function cttz_u32x8 (line 84) | fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
  function cttz_u32x16 (line 86) | fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
  function cttz_u64x2 (line 89) | fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
  function cttz_u64x4 (line 91) | fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
  function cttz_u64x8 (line 93) | fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
  function cttz_u128x1 (line 96) | fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
  function cttz_u128x2 (line 98) | fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
  function cttz_u128x4 (line 100) | fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
  function ctpop_u8x2 (line 103) | fn ctpop_u8x2(x: u8x2) -> u8x2;
  function ctpop_u8x4 (line 105) | fn ctpop_u8x4(x: u8x4) -> u8x4;
  function ctpop_u8x8 (line 107) | fn ctpop_u8x8(x: u8x8) -> u8x8;
  function ctpop_u8x16 (line 109) | fn ctpop_u8x16(x: u8x16) -> u8x16;
  function ctpop_u8x32 (line 111) | fn ctpop_u8x32(x: u8x32) -> u8x32;
  function ctpop_u8x64 (line 113) | fn ctpop_u8x64(x: u8x64) -> u8x64;
  function ctpop_u16x2 (line 116) | fn ctpop_u16x2(x: u16x2) -> u16x2;
  function ctpop_u16x4 (line 118) | fn ctpop_u16x4(x: u16x4) -> u16x4;
  function ctpop_u16x8 (line 120) | fn ctpop_u16x8(x: u16x8) -> u16x8;
  function ctpop_u16x16 (line 122) | fn ctpop_u16x16(x: u16x16) -> u16x16;
  function ctpop_u16x32 (line 124) | fn ctpop_u16x32(x: u16x32) -> u16x32;
  function ctpop_u32x2 (line 127) | fn ctpop_u32x2(x: u32x2) -> u32x2;
  function ctpop_u32x4 (line 129) | fn ctpop_u32x4(x: u32x4) -> u32x4;
  function ctpop_u32x8 (line 131) | fn ctpop_u32x8(x: u32x8) -> u32x8;
  function ctpop_u32x16 (line 133) | fn ctpop_u32x16(x: u32x16) -> u32x16;
  function ctpop_u64x2 (line 136) | fn ctpop_u64x2(x: u64x2) -> u64x2;
  function ctpop_u64x4 (line 138) | fn ctpop_u64x4(x: u64x4) -> u64x4;
  function ctpop_u64x8 (line 140) | fn ctpop_u64x8(x: u64x8) -> u64x8;
  function ctpop_u128x1 (line 143) | fn ctpop_u128x1(x: u128x1) -> u128x1;
  function ctpop_u128x2 (line 145) | fn ctpop_u128x2(x: u128x2) -> u128x2;
  function ctpop_u128x4 (line 147) | fn ctpop_u128x4(x: u128x4) -> u128x4;
  type BitManip (line 150) | pub(crate) trait BitManip {
    method ctpop (line 151) | fn ctpop(self) -> Self;
    method ctlz (line 152) | fn ctlz(self) -> Self;
    method cttz (line 153) | fn cttz(self) -> Self;
    method ctpop (line 277) | fn ctpop(self) -> Self {
    method ctlz (line 283) | fn ctlz(self) -> Self {
    method cttz (line 289) | fn cttz(self) -> Self {
    method ctpop (line 303) | fn ctpop(self) -> Self {
    method ctlz (line 309) | fn ctlz(self) -> Self {
    method cttz (line 315) | fn cttz(self) -> Self {

FILE: src/codegen/llvm.rs
  function simd_shuffle (line 9) | fn simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U;
  function __shuffle_vector2 (line 14) | pub unsafe fn __shuffle_vector2<const IDX: [u32; 2], T, U>(x: T, y: T) -> U
  function __shuffle_vector4 (line 24) | pub unsafe fn __shuffle_vector4<const IDX: [u32; 4], T, U>(x: T, y: T) -> U
  function __shuffle_vector8 (line 34) | pub unsafe fn __shuffle_vector8<const IDX: [u32; 8], T, U>(x: T, y: T) -> U
  function __shuffle_vector16 (line 44) | pub unsafe fn __shuffle_vector16<const IDX: [u32; 16], T, U>(x: T, y: T)...
  function __shuffle_vector32 (line 54) | pub unsafe fn __shuffle_vector32<const IDX: [u32; 32], T, U>(x: T, y: T)...
  function __shuffle_vector64 (line 64) | pub unsafe fn __shuffle_vector64<const IDX: [u32; 64], T, U>(x: T, y: T)...
  function simd_eq (line 73) | pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
  function simd_ne (line 74) | pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
  function simd_lt (line 75) | pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
  function simd_le (line 76) | pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
  function simd_gt (line 77) | pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
  function simd_ge (line 78) | pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
  function simd_insert (line 80) | pub(crate) fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
  function simd_extract (line 81) | pub(crate) fn simd_extract<T, U>(x: T, idx: u32) -> U;
  function simd_cast (line 83) | pub(crate) fn simd_cast<T, U>(x: T) -> U;
  function simd_add (line 85) | pub(crate) fn simd_add<T>(x: T, y: T) -> T;
  function simd_sub (line 86) | pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
  function simd_mul (line 87) | pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
  function simd_div (line 88) | pub(crate) fn simd_div<T>(x: T, y: T) -> T;
  function simd_rem (line 89) | pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
  function simd_shl (line 90) | pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
  function simd_shr (line 91) | pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
  function simd_and (line 92) | pub(crate) fn simd_and<T>(x: T, y: T) -> T;
  function simd_or (line 93) | pub(crate) fn simd_or<T>(x: T, y: T) -> T;
  function simd_xor (line 94) | pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
  function simd_reduce_add_unordered (line 96) | pub(crate) fn simd_reduce_add_unordered<T, U>(x: T) -> U;
  function simd_reduce_mul_unordered (line 97) | pub(crate) fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
  function simd_reduce_add_ordered (line 98) | pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
  function simd_reduce_mul_ordered (line 99) | pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
  function simd_reduce_min (line 100) | pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
  function simd_reduce_max (line 101) | pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
  function simd_reduce_min_nanless (line 102) | pub(crate) fn simd_reduce_min_nanless<T, U>(x: T) -> U;
  function simd_reduce_max_nanless (line 103) | pub(crate) fn simd_reduce_max_nanless<T, U>(x: T) -> U;
  function simd_reduce_and (line 104) | pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
  function simd_reduce_or (line 105) | pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
  function simd_reduce_xor (line 106) | pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
  function simd_reduce_all (line 107) | pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
  function simd_reduce_any (line 108) | pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
  function simd_select (line 110) | pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
  function simd_fmin (line 112) | pub(crate) fn simd_fmin<T>(a: T, b: T) -> T;
  function simd_fmax (line 113) | pub(crate) fn simd_fmax<T>(a: T, b: T) -> T;
  function simd_fsqrt (line 115) | pub(crate) fn simd_fsqrt<T>(a: T) -> T;
  function simd_fma (line 116) | pub(crate) fn simd_fma<T>(a: T, b: T, c: T) -> T;
  function simd_gather (line 118) | pub(crate) fn simd_gather<T, P, M>(value: T, pointers: P, mask: M) -> T;
  function simd_scatter (line 119) | pub(crate) fn simd_scatter<T, P, M>(value: T, pointers: P, mask: M);
  function simd_bitmask (line 121) | pub(crate) fn simd_bitmask<T, U>(value: T) -> U;

FILE: src/codegen/math/float/abs.rs
  type Abs (line 8) | pub(crate) trait Abs {
    method abs (line 9) | fn abs(self) -> Self;
  function fabs_v2f32 (line 15) | fn fabs_v2f32(x: f32x2) -> f32x2;
  function fabs_v4f32 (line 17) | fn fabs_v4f32(x: f32x4) -> f32x4;
  function fabs_v8f32 (line 19) | fn fabs_v8f32(x: f32x8) -> f32x8;
  function fabs_v16f32 (line 21) | fn fabs_v16f32(x: f32x16) -> f32x16;
  function fabs_v2f64 (line 27) | fn fabs_v2f64(x: f64x2) -> f64x2;
  function fabs_v4f64 (line 29) | fn fabs_v4f64(x: f64x4) -> f64x4;
  function fabs_v8f64 (line 31) | fn fabs_v8f64(x: f64x8) -> f64x8;
  function fabs_f32 (line 34) | fn fabs_f32(x: f32) -> f32;
  function fabs_f64 (line 36) | fn fabs_f64(x: f64) -> f64;

FILE: src/codegen/math/float/cos.rs
  type Cos (line 8) | pub(crate) trait Cos {
    method cos (line 9) | fn cos(self) -> Self;
  function cos_v2f32 (line 15) | fn cos_v2f32(x: f32x2) -> f32x2;
  function cos_v4f32 (line 17) | fn cos_v4f32(x: f32x4) -> f32x4;
  function cos_v8f32 (line 19) | fn cos_v8f32(x: f32x8) -> f32x8;
  function cos_v16f32 (line 21) | fn cos_v16f32(x: f32x16) -> f32x16;
  function cos_v2f64 (line 27) | fn cos_v2f64(x: f64x2) -> f64x2;
  function cos_v4f64 (line 29) | fn cos_v4f64(x: f64x4) -> f64x4;
  function cos_v8f64 (line 31) | fn cos_v8f64(x: f64x8) -> f64x8;
  function cos_f32 (line 34) | fn cos_f32(x: f32) -> f32;
  function cos_f64 (line 36) | fn cos_f64(x: f64) -> f64;

FILE: src/codegen/math/float/cos_pi.rs
  type CosPi (line 8) | pub(crate) trait CosPi {
    method cos_pi (line 9) | fn cos_pi(self) -> Self;

FILE: src/codegen/math/float/exp.rs
  type Exp (line 8) | pub(crate) trait Exp {
    method exp (line 9) | fn exp(self) -> Self;
  function exp_v2f32 (line 15) | fn exp_v2f32(x: f32x2) -> f32x2;
  function exp_v4f32 (line 17) | fn exp_v4f32(x: f32x4) -> f32x4;
  function exp_v8f32 (line 19) | fn exp_v8f32(x: f32x8) -> f32x8;
  function exp_v16f32 (line 21) | fn exp_v16f32(x: f32x16) -> f32x16;
  function exp_v2f64 (line 27) | fn exp_v2f64(x: f64x2) -> f64x2;
  function exp_v4f64 (line 29) | fn exp_v4f64(x: f64x4) -> f64x4;
  function exp_v8f64 (line 31) | fn exp_v8f64(x: f64x8) -> f64x8;
  function exp_f32 (line 34) | fn exp_f32(x: f32) -> f32;
  function exp_f64 (line 36) | fn exp_f64(x: f64) -> f64;

FILE: src/codegen/math/float/ln.rs
  type Ln (line 8) | pub(crate) trait Ln {
    method ln (line 9) | fn ln(self) -> Self;
  function ln_v2f32 (line 15) | fn ln_v2f32(x: f32x2) -> f32x2;
  function ln_v4f32 (line 17) | fn ln_v4f32(x: f32x4) -> f32x4;
  function ln_v8f32 (line 19) | fn ln_v8f32(x: f32x8) -> f32x8;
  function ln_v16f32 (line 21) | fn ln_v16f32(x: f32x16) -> f32x16;
  function ln_v2f64 (line 27) | fn ln_v2f64(x: f64x2) -> f64x2;
  function ln_v4f64 (line 29) | fn ln_v4f64(x: f64x4) -> f64x4;
  function ln_v8f64 (line 31) | fn ln_v8f64(x: f64x8) -> f64x8;
  function ln_f32 (line 34) | fn ln_f32(x: f32) -> f32;
  function ln_f64 (line 36) | fn ln_f64(x: f64) -> f64;

FILE: src/codegen/math/float/mul_add.rs
  type MulAdd (line 7) | pub(crate) trait MulAdd {
    method mul_add (line 8) | fn mul_add(self, y: Self, z: Self) -> Self;
  function fma_v2f32 (line 15) | fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
  function fma_v4f32 (line 17) | fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
  function fma_v8f32 (line 19) | fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
  function fma_v16f32 (line 21) | fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
  function fma_v2f64 (line 27) | fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
  function fma_v4f64 (line 29) | fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
  function fma_v8f64 (line 31) | fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;

FILE: src/codegen/math/float/mul_adde.rs
  type MulAddE (line 6) | pub(crate) trait MulAddE {
    method mul_adde (line 7) | fn mul_adde(self, y: Self, z: Self) -> Self;
  function fmuladd_v2f32 (line 14) | fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
  function fmuladd_v4f32 (line 16) | fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
  function fmuladd_v8f32 (line 18) | fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
  function fmuladd_v16f32 (line 20) | fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
  function fmuladd_v2f64 (line 26) | fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
  function fmuladd_v4f64 (line 28) | fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
  function fmuladd_v8f64 (line 30) | fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;

FILE: src/codegen/math/float/powf.rs
  type Powf (line 8) | pub(crate) trait Powf {
    method powf (line 9) | fn powf(self, x: Self) -> Self;
  function powf_v2f32 (line 15) | fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2;
  function powf_v4f32 (line 17) | fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4;
  function powf_v8f32 (line 19) | fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8;
  function powf_v16f32 (line 21) | fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16;
  function powf_v2f64 (line 27) | fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2;
  function powf_v4f64 (line 29) | fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4;
  function powf_v8f64 (line 31) | fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8;
  function powf_f32 (line 34) | fn powf_f32(x: f32, y: f32) -> f32;
  function powf_f64 (line 36) | fn powf_f64(x: f64, y: f64) -> f64;

FILE: src/codegen/math/float/sin.rs
  type Sin (line 8) | pub(crate) trait Sin {
    method sin (line 9) | fn sin(self) -> Self;
  function sin_v2f32 (line 15) | fn sin_v2f32(x: f32x2) -> f32x2;
  function sin_v4f32 (line 17) | fn sin_v4f32(x: f32x4) -> f32x4;
  function sin_v8f32 (line 19) | fn sin_v8f32(x: f32x8) -> f32x8;
  function sin_v16f32 (line 21) | fn sin_v16f32(x: f32x16) -> f32x16;
  function sin_v2f64 (line 27) | fn sin_v2f64(x: f64x2) -> f64x2;
  function sin_v4f64 (line 29) | fn sin_v4f64(x: f64x4) -> f64x4;
  function sin_v8f64 (line 31) | fn sin_v8f64(x: f64x8) -> f64x8;
  function sin_f32 (line 34) | fn sin_f32(x: f32) -> f32;
  function sin_f64 (line 36) | fn sin_f64(x: f64) -> f64;

FILE: src/codegen/math/float/sin_cos_pi.rs
  type SinCosPi (line 8) | pub(crate) trait SinCosPi: Sized {
    method sin_cos_pi (line 10) | fn sin_cos_pi(self) -> Self::Output;

FILE: src/codegen/math/float/sin_pi.rs
  type SinPi (line 8) | pub(crate) trait SinPi {
    method sin_pi (line 9) | fn sin_pi(self) -> Self;

FILE: src/codegen/math/float/sqrt.rs
  type Sqrt (line 8) | pub(crate) trait Sqrt {
    method sqrt (line 9) | fn sqrt(self) -> Self;
  function sqrt_v2f32 (line 15) | fn sqrt_v2f32(x: f32x2) -> f32x2;
  function sqrt_v4f32 (line 17) | fn sqrt_v4f32(x: f32x4) -> f32x4;
  function sqrt_v8f32 (line 19) | fn sqrt_v8f32(x: f32x8) -> f32x8;
  function sqrt_v16f32 (line 21) | fn sqrt_v16f32(x: f32x16) -> f32x16;
  function sqrt_v2f64 (line 27) | fn sqrt_v2f64(x: f64x2) -> f64x2;
  function sqrt_v4f64 (line 29) | fn sqrt_v4f64(x: f64x4) -> f64x4;
  function sqrt_v8f64 (line 31) | fn sqrt_v8f64(x: f64x8) -> f64x8;
  function sqrt_f32 (line 34) | fn sqrt_f32(x: f32) -> f32;
  function sqrt_f64 (line 36) | fn sqrt_f64(x: f64) -> f64;

FILE: src/codegen/math/float/sqrte.rs
  type Sqrte (line 9) | pub(crate) trait Sqrte {
    method sqrte (line 10) | fn sqrte(self) -> Self;

FILE: src/codegen/math/float/tanh.rs
  type Tanh (line 11) | pub(crate) trait Tanh {
    method tanh (line 12) | fn tanh(self) -> Self;
  function tanh_f32 (line 48) | fn tanh_f32(x: f32) -> f32 {
  function tanh_f64 (line 52) | fn tanh_f64(x: f64) -> f64 {

FILE: src/codegen/reductions/mask.rs
  type All (line 10) | pub(crate) trait All: crate::marker::Sized {
    method all (line 11) | unsafe fn all(self) -> bool;
  type Any (line 14) | pub(crate) trait Any: crate::marker::Sized {
    method any (line 15) | unsafe fn any(self) -> bool;

FILE: src/codegen/shuffle.rs
  type Output (line 116) | type Output = crate::codegen::cptrx2<T>;
  type Output (line 120) | type Output = crate::codegen::cptrx4<T>;
  type Output (line 124) | type Output = crate::codegen::cptrx8<T>;
  type Output (line 129) | type Output = crate::codegen::mptrx2<T>;
  type Output (line 133) | type Output = crate::codegen::mptrx4<T>;
  type Output (line 137) | type Output = crate::codegen::mptrx8<T>;

FILE: src/codegen/shuffle1_dyn.rs
  type Shuffle1Dyn (line 5) | pub trait Shuffle1Dyn {
    method shuffle1_dyn (line 7) | fn shuffle1_dyn(self, _: Self::Indices) -> Self;

FILE: src/codegen/swap_bytes.rs
  type SwapBytes (line 8) | pub(crate) trait SwapBytes {
    method swap_bytes (line 9) | fn swap_bytes(self) -> Self;

FILE: src/lib.rs
  type Simd (line 289) | pub struct Simd<A: sealed::SimdArray>(
  type LexicographicallyOrdered (line 305) | pub struct LexicographicallyOrdered<T>(T);

FILE: src/sealed.rs
  type Seal (line 5) | pub trait Seal<T = ()> {}
  type SimdArray (line 8) | pub trait SimdArray: Seal {
    constant N (line 14) | const N: usize;
  type Shuffle (line 22) | pub trait Shuffle<Lanes>: Seal<Lanes> {
  type Simd (line 30) | pub trait Simd: Seal {
    constant LANES (line 34) | const LANES: usize;
  type Mask (line 40) | pub trait Mask: Seal {
    method test (line 41) | fn test(&self) -> bool;

FILE: src/testing/utils.rs
  function test_lt (line 10) | pub fn test_lt<T>(a: LexicographicallyOrdered<T>, b: LexicographicallyOr...
  function test_le (line 39) | pub fn test_le<T>(a: LexicographicallyOrdered<T>, b: LexicographicallyOr...
  function test_cmp (line 61) | pub fn test_cmp<T>(

FILE: tests/endianness.rs
  function endian_indexing (line 9) | fn endian_indexing() {
  function endian_bitcasts (line 19) | fn endian_bitcasts() {
  function endian_casts (line 36) | fn endian_casts() {
  function endian_load_and_stores (line 53) | fn endian_load_and_stores() {
  function endian_array_union (line 75) | fn endian_array_union() {
  function endian_tuple_access (line 142) | fn endian_tuple_access() {

FILE: verify/verify/src/api/math/float/mul_add.rs
  function fused_multiply_add (line 11) | unsafe fn fused_multiply_add(a: f32x4, b: f32x4, c: f32x4) -> f32x4 {
  function fused_multiply_sub (line 18) | unsafe fn fused_multiply_sub(a: f32x4, b: f32x4, c: f32x4) -> f32x4 {
  function fused_negate_multiply_add (line 25) | unsafe fn fused_negate_multiply_add(
  function fused_negate_multiply_sub (line 34) | unsafe fn fused_negate_multiply_sub(
  function fused_multiply_add_sub (line 43) | unsafe fn fused_multiply_add_sub(
  function fused_multiply_sub_add (line 55) | unsafe fn fused_multiply_sub_add(

FILE: verify/verify/src/api/ops/vector_rotates/x86.rs
  function rotate_right_variable (line 9) | unsafe fn rotate_right_variable(x: u64x8, v: u64x8) -> u64x8 {
  function rotate_left_variable (line 16) | unsafe fn rotate_left_variable(x: u64x8, v: u64x8) -> u64x8 {
  function rotate_right (line 23) | unsafe fn rotate_right(x: u64x8) -> u64x8 {
  function rotate_left (line 30) | unsafe fn rotate_left(x: u64x8) -> u64x8 {
  function rotate_left_x2 (line 37) | unsafe fn rotate_left_x2(x: u64x2) -> u64x2 {

Download .json

Condensed preview — 363 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,017K chars).

[
  {
    "path": ".appveyor.yml",
    "chars": 1778,
    "preview": "matrix:\n  allow_failures:\n    # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72\n    - TARGET: i686-pc-"
  },
  {
    "path": ".github/workflows/benchmarks.yml",
    "chars": 746,
    "preview": "name: benchmarks\n\non:\n  push:\n    branches:\n      - master\n  pull_request:\n  workflow_dispatch:\n\njobs:\n  x86_64-unknown-"
  },
  {
    "path": ".github/workflows/ci.yml",
    "chars": 6123,
    "preview": "name: ci\n\n# trigger for all PRs and changes to master\non:\n  push:\n    branches:\n      - master\n  pull_request:\n\njobs:\n  "
  },
  {
    "path": ".github/workflows/docs.yml",
    "chars": 191,
    "preview": "name: docs\n\non:\n  push:\n    branches:\n      - master\n\njobs:\n  docs:\n    uses: ./.github/workflows/run-ci-script.yml\n    "
  },
  {
    "path": ".github/workflows/run-ci-script.yml",
    "chars": 2367,
    "preview": "name: run-ci-script\n\non:\n  workflow_call:\n    inputs:\n      runner:\n        required: false\n        type: string\n       "
  },
  {
    "path": ".gitignore",
    "chars": 553,
    "preview": "Cargo.lock\ntarget/\n\n# llvm-ir and assembly\n*.ll\n*.d\n\n# png files output by benchmarks\n*.png\n\n# -*- mode: gitignore; -*-\n"
  },
  {
    "path": ".travis.yml",
    "chars": 8214,
    "preview": "language: rust\nrust: nightly\nos: linux\ndist: focal\n\nstages:\n  - tools\n  - build-test-verify # Passes full test suite, pe"
  },
  {
    "path": "Cargo.toml",
    "chars": 1409,
    "preview": "[package]\nname = \"packed_simd\"\nversion = \"0.3.9\"\ndescription = \"Portable Packed SIMD vectors\"\ndocumentation = \"https://d"
  },
  {
    "path": "LICENSE-APACHE",
    "chars": 10847,
    "preview": "                              Apache License\n                        Version 2.0, January 2004\n                     http"
  },
  {
    "path": "LICENSE-MIT",
    "chars": 1071,
    "preview": "Copyright (c) 2014 The Rust Project Developers\n\nPermission is hereby granted, free of charge, to any\nperson obtaining a "
  },
  {
    "path": "README.md",
    "chars": 6793,
    "preview": "# `Simd<[T; N]>`\n\n## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]\n\n[![Latest Version]][crates.io] [![docs]]["
  },
  {
    "path": "bors.toml",
    "chars": 56,
    "preview": "status = [\n    \"continuous-integration/travis-ci/push\"\n]"
  },
  {
    "path": "build.rs",
    "chars": 195,
    "preview": "fn main() {\n    let target = std::env::var(\"TARGET\").expect(\"TARGET environment variable not defined\");\n    if target.co"
  },
  {
    "path": "ci/all.sh",
    "chars": 848,
    "preview": "#!/usr/bin/env bash\n#\n# Performs an operation on all targets\n\nset -ex\n\n: \"${1?The all.sh script requires one argument.}\""
  },
  {
    "path": "ci/android-install-ndk.sh",
    "chars": 735,
    "preview": "#!/usr/bin/env sh\n# Copyright 2016 The Rust Project Developers. See the COPYRIGHT\n# file at the top-level directory of t"
  },
  {
    "path": "ci/android-install-sdk.sh",
    "chars": 1664,
    "preview": "#!/usr/bin/env sh\n# Copyright 2016 The Rust Project Developers. See the COPYRIGHT\n# file at the top-level directory of t"
  },
  {
    "path": "ci/android-sysimage.sh",
    "chars": 1664,
    "preview": "#!/usr/bin/env bash\n\n# Copyright 2017 The Rust Project Developers. See the COPYRIGHT\n# file at the top-level directory o"
  },
  {
    "path": "ci/benchmark.sh",
    "chars": 821,
    "preview": "#!/usr/bin/env bash\n#\n# Runs all benchmarks. Controlled by the following environment variables:\n#\n# FEATURES={} - cargo "
  },
  {
    "path": "ci/deploy_and_run_on_ios_simulator.rs",
    "chars": 5815,
    "preview": "// Copyright 2017 The Rust Project Developers. See the COPYRIGHT\n// file at the top-level directory of this distribution"
  },
  {
    "path": "ci/docker/aarch64-linux-android/Dockerfile",
    "chars": 1230,
    "preview": "FROM ubuntu:16.04\n\nRUN dpkg --add-architecture i386 && \\\n    apt-get update && \\\n    apt-get install -y --no-install-rec"
  },
  {
    "path": "ci/docker/aarch64-unknown-linux-gnu/Dockerfile",
    "chars": 415,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  ca-certificates \\\n  libc6"
  },
  {
    "path": "ci/docker/arm-unknown-linux-gnueabi/Dockerfile",
    "chars": 463,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  ca-certificates \\\n  libc6"
  },
  {
    "path": "ci/docker/arm-unknown-linux-gnueabihf/Dockerfile",
    "chars": 422,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  ca-certificates \\\n  libc6"
  },
  {
    "path": "ci/docker/armv7-linux-androideabi/Dockerfile",
    "chars": 1222,
    "preview": "FROM ubuntu:16.04\n\nRUN dpkg --add-architecture i386 && \\\n    apt-get update && \\\n    apt-get install -y --no-install-rec"
  },
  {
    "path": "ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile",
    "chars": 426,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  ca-certificates \\\n  libc6"
  },
  {
    "path": "ci/docker/i586-unknown-linux-gnu/Dockerfile",
    "chars": 152,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc-multilib \\\n  libc6-dev \\\n  fi"
  },
  {
    "path": "ci/docker/i686-unknown-linux-gnu/Dockerfile",
    "chars": 152,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc-multilib \\\n  libc6-dev \\\n  fi"
  },
  {
    "path": "ci/docker/mips-unknown-linux-gnu/Dockerfile",
    "chars": 448,
    "preview": "FROM ubuntu:18.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        gcc libc6-dev qemu-user ca"
  },
  {
    "path": "ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile",
    "chars": 456,
    "preview": "FROM ubuntu:18.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        gcc libc6-dev qemu-user ca"
  },
  {
    "path": "ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile",
    "chars": 464,
    "preview": "FROM ubuntu:18.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        gcc libc6-dev qemu-user ca"
  },
  {
    "path": "ci/docker/mipsel-unknown-linux-musl/Dockerfile",
    "chars": 849,
    "preview": "FROM ubuntu:18.10\n\nRUN apt-get update && \\\n    apt-get install -y --no-install-recommends \\\n    ca-certificates \\\n    gc"
  },
  {
    "path": "ci/docker/powerpc-unknown-linux-gnu/Dockerfile",
    "chars": 489,
    "preview": "FROM ubuntu:22.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        gcc libc6-dev qemu-user ca"
  },
  {
    "path": "ci/docker/powerpc64-unknown-linux-gnu/Dockerfile",
    "chars": 499,
    "preview": "FROM ubuntu:22.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n    gcc \\\n    ca-certificates \\\n  "
  },
  {
    "path": "ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile",
    "chars": 491,
    "preview": "FROM ubuntu:22.04\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        gcc libc6-dev qemu-user ca"
  },
  {
    "path": "ci/docker/s390x-unknown-linux-gnu/Dockerfile",
    "chars": 554,
    "preview": "FROM ubuntu:22.04\n\nRUN apt-get update && \\\n    apt-get install -y --no-install-recommends \\\n    ca-certificates \\\n    cu"
  },
  {
    "path": "ci/docker/sparc64-unknown-linux-gnu/Dockerfile",
    "chars": 603,
    "preview": "FROM debian:bookworm\n\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n        curl ca-certificates \\\n"
  },
  {
    "path": "ci/docker/thumbv7neon-linux-androideabi/Dockerfile",
    "chars": 1238,
    "preview": "FROM ubuntu:16.04\n\nRUN dpkg --add-architecture i386 && \\\n    apt-get update && \\\n    apt-get install -y --no-install-rec"
  },
  {
    "path": "ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile",
    "chars": 438,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  ca-certificates \\\n  libc6"
  },
  {
    "path": "ci/docker/wasm32-unknown-unknown/Dockerfile",
    "chars": 1345,
    "preview": "FROM ubuntu:22.04\n\nRUN apt-get update -y && apt-get install -y --no-install-recommends \\\n  ca-certificates \\\n  clang \\\n "
  },
  {
    "path": "ci/docker/x86_64-linux-android/Dockerfile",
    "chars": 892,
    "preview": "FROM ubuntu:20.04\n\nRUN apt-get update && \\\n    apt-get install -y --no-install-recommends \\\n  ca-certificates \\\n  curl \\"
  },
  {
    "path": "ci/docker/x86_64-unknown-linux-gnu/Dockerfile",
    "chars": 180,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  libc6-dev \\\n  file \\\n  ma"
  },
  {
    "path": "ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile",
    "chars": 451,
    "preview": "FROM ubuntu:18.04\nRUN apt-get update && apt-get install -y --no-install-recommends \\\n  gcc \\\n  libc6-dev \\\n  file \\\n  ma"
  },
  {
    "path": "ci/dox.sh",
    "chars": 663,
    "preview": "#!/bin/sh\n\nset -ex\n\nrm -rf target/doc\nmkdir -p target/doc\n\n# Build API documentation\ncargo doc --features=into_bits\n\n# B"
  },
  {
    "path": "ci/linux-s390x.sh",
    "chars": 515,
    "preview": "set -ex\n\nmkdir -m 777 /qemu\ncd /qemu\n\ncurl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img\ncurl -LO htt"
  },
  {
    "path": "ci/linux-sparc64.sh",
    "chars": 433,
    "preview": "set -ex\n\nmkdir -m 777 /qemu\ncd /qemu\n\ncurl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sp"
  },
  {
    "path": "ci/lld-shim.rs",
    "chars": 282,
    "preview": "use std::os::unix::prelude::*;\nuse std::process::Command;\nuse std::env;\n\nfn main() {\n    let args = env::args()\n        "
  },
  {
    "path": "ci/max_line_width.sh",
    "chars": 281,
    "preview": "#!/usr/bin/env sh\n\nset -x\n\nexport success=true\n\nfind . -iname '*.rs' | while read -r file; do\n    result=$(grep '.\\{79\\}"
  },
  {
    "path": "ci/run-docker.sh",
    "chars": 976,
    "preview": "# Small script to run tests for a target (or all targets) inside all the\n# respective docker images.\n\nset -ex\n\nrun() {\n "
  },
  {
    "path": "ci/run.sh",
    "chars": 3275,
    "preview": "#!/usr/bin/env bash\n\nset -ex\n\n: ${TARGET?\"The TARGET environment variable must be set.\"}\n\n# Tests are all super fast any"
  },
  {
    "path": "ci/run_examples.sh",
    "chars": 1881,
    "preview": "# Runs all examples.\n\n# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55\n# All examples fail to build f"
  },
  {
    "path": "ci/runtest-android.rs",
    "chars": 1365,
    "preview": "use std::env;\nuse std::process::Command;\nuse std::path::{Path, PathBuf};\n\nfn main() {\n    let args = env::args_os()\n    "
  },
  {
    "path": "ci/setup_benchmarks.sh",
    "chars": 180,
    "preview": "#!/usr/bin/env bash\n\nset -ex\n\n# Get latest ISPC binary for the target and put it in the path\ngit clone https://github.co"
  },
  {
    "path": "ci/test-runner-linux",
    "chars": 454,
    "preview": "#!/bin/sh\n\nset -e\n\narch=$1\nprog=$2\n\ncd /qemu/init\ncp -f $2 prog\nfind . | cpio --create --format='newc' --quiet | gzip > "
  },
  {
    "path": "contributing.md",
    "chars": 2956,
    "preview": "# Contributing to `packed_simd`\n\nWelcome! If you are reading this document, it means you are interested in contributing\n"
  },
  {
    "path": "examples/Cargo.toml",
    "chars": 771,
    "preview": "# FIXME: Many members of this workspace, including aobench, mandelbrot, and stencil,\n# currently trigger a \"null pointer"
  },
  {
    "path": "examples/aobench/Cargo.toml",
    "chars": 1035,
    "preview": "[package]\nname = \"aobench\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nautobenches = false\nedition = "
  },
  {
    "path": "examples/aobench/benches/ambient_occlusion.rs",
    "chars": 1248,
    "preview": "//! Benchmarks intersection between rays and planes\n#![feature(stdsimd)]\n\nuse aobench_lib::*;\nuse criterion::*;\nuse inte"
  },
  {
    "path": "examples/aobench/benches/isec_plane.rs",
    "chars": 4368,
    "preview": "//! Benchmarks intersection between rays and planes\n#![feature(stdsimd)]\n\nuse criterion::*;\n\nuse crate::geometry::{f32xN"
  },
  {
    "path": "examples/aobench/benches/isec_sphere.rs",
    "chars": 4138,
    "preview": "//! Benchmarks intersection between rays and spheres\n#![feature(stdsimd)]\n\nuse crate::geometry::{f32xN, Ray, RayxN, Sphe"
  },
  {
    "path": "examples/aobench/benches/random.rs",
    "chars": 849,
    "preview": "//! Benchmarks PNRG\n#![feature(stdsimd)]\n\nuse aobench_lib::geometry::f32xN;\nuse aobench_lib::random;\nuse criterion::*;\n\n"
  },
  {
    "path": "examples/aobench/benches/scanlines.rs",
    "chars": 857,
    "preview": "#![feature(test)]\n\nuse test::{black_box, Bencher};\n\n#[bench]\nfn scanlines_scalar(b: &mut Bencher) {\n    let width = 50;\n"
  },
  {
    "path": "examples/aobench/benchmark.sh",
    "chars": 1237,
    "preview": "#!/usr/bin/env bash\n#\n# Runs aobench benchmarks\n\nset -ex\n\nexport WIDTH=800\nexport HEIGHT=600\n\nif [[ ${NORUN} != 1 ]]; th"
  },
  {
    "path": "examples/aobench/build.rs",
    "chars": 775,
    "preview": "fn main() {\n    println!(\"cargo:rerun-if-changed=build.rs\");\n\n    #[cfg(feature = \"ispc\")]\n    {\n        if std::env::va"
  },
  {
    "path": "examples/aobench/readme.md",
    "chars": 4228,
    "preview": "# Ambient Occlusion Benchmark\n\n> Originally written by Syoyo Fujita: https://github.com/syoyo/aobench\n\n`aoench` is a sma"
  },
  {
    "path": "examples/aobench/rustfmt.toml",
    "chars": 14,
    "preview": "max_width = 79"
  },
  {
    "path": "examples/aobench/src/ambient_occlusion.rs",
    "chars": 5448,
    "preview": "//! Ambient Occlusion implementations\n\nuse crate::geometry::{f32xN, Ray, RayxN, Selectable, V3DxN, V3D};\nuse crate::inte"
  },
  {
    "path": "examples/aobench/src/geometry/mod.rs",
    "chars": 2840,
    "preview": "//! Geometry utilities\n\nuse packed_simd::*;\n\nmod plane;\nmod ray;\nmod sphere;\nmod vec;\n\nmod rayxN;\nmod vecxN;\n\npub use se"
  },
  {
    "path": "examples/aobench/src/geometry/plane.rs",
    "chars": 121,
    "preview": "//! Plane\n\nuse crate::geometry::V3D;\n\n#[derive(Copy, Clone, Debug)]\npub struct Plane {\n    pub p: V3D,\n    pub n: V3D,\n}"
  },
  {
    "path": "examples/aobench/src/geometry/ray.rs",
    "chars": 175,
    "preview": "//! A ray\n\nuse crate::geometry::V3D;\n\n/// Ray starting at `origin` in `dir` direction.\n#[derive(Copy, Clone, Debug)]\npub"
  },
  {
    "path": "examples/aobench/src/geometry/rayxN.rs",
    "chars": 381,
    "preview": "//! Four packed rays\n\nuse crate::geometry::{Ray, V3DxN};\n\n/// Four packed rays starting at `origin` in `dir` direction.\n"
  },
  {
    "path": "examples/aobench/src/geometry/sphere.rs",
    "chars": 133,
    "preview": "//! Sphere\n\nuse crate::geometry::V3D;\n\n#[derive(Copy, Clone, Debug)]\npub struct Sphere {\n    pub center: V3D,\n    pub ra"
  },
  {
    "path": "examples/aobench/src/geometry/vec.rs",
    "chars": 3534,
    "preview": "//! A simple vector type\n\nuse std::ops::*;\n\n#[derive(Copy, Clone, Debug, PartialEq)]\npub struct V3D {\n    pub x: f32,\n  "
  },
  {
    "path": "examples/aobench/src/geometry/vecxN.rs",
    "chars": 5496,
    "preview": "//! A simple vector type\n\nuse std::ops::*;\n\nuse crate::geometry::{f32xN, m32xN, Dot, M3x3, V3D};\n\n#[derive(Copy, Clone, "
  },
  {
    "path": "examples/aobench/src/image.rs",
    "chars": 2341,
    "preview": "//! Image utilities\n\nuse failure::Error;\n#[allow(unused)]\nuse png::{BitDepth, ColorType, Encoder};\nuse std::path::Path;\n"
  },
  {
    "path": "examples/aobench/src/intersection/mod.rs",
    "chars": 295,
    "preview": "//! Intersection functions\n\n/// Intersection of `I` with `Self`\npub trait Intersect<I> {\n    type Isect;\n    fn intersec"
  },
  {
    "path": "examples/aobench/src/intersection/packet.rs",
    "chars": 734,
    "preview": "//! SIMD intersection result\n\nuse crate::geometry::{f32xN, m32xN, V3DxN};\nuse crate::intersection::Isect;\n\n/// Intersect"
  },
  {
    "path": "examples/aobench/src/intersection/ray_plane.rs",
    "chars": 6862,
    "preview": "//! Intersection of a ray with a plane\n\nuse crate::geometry::{f32xN, Dot, Plane, Ray, RayxN, Selectable};\nuse crate::int"
  },
  {
    "path": "examples/aobench/src/intersection/ray_sphere.rs",
    "chars": 5837,
    "preview": "//! Intersection of a ray with a sphere.\n\nuse crate::geometry::{f32xN, Dot, Ray, RayxN, Selectable, Sphere};\nuse crate::"
  },
  {
    "path": "examples/aobench/src/intersection/single.rs",
    "chars": 709,
    "preview": "//! Scalar intersection result\n\nuse crate::geometry::V3D;\n\n/// Intersection result\n#[derive(Copy, Clone, Debug)]\npub str"
  },
  {
    "path": "examples/aobench/src/ispc_.rs",
    "chars": 706,
    "preview": "//! Includes the ISPC implementations.\nuse crate::*;\nuse ispc::*;\n\nispc_module!(aobench);\n\npub fn ao<S: Scene>(\n    _sce"
  },
  {
    "path": "examples/aobench/src/lib.rs",
    "chars": 982,
    "preview": "//! aobench: Ambient Occlusion Renderer benchmark.\n//!\n//! Based on [aobench](https://code.google.com/archive/p/aobench/"
  },
  {
    "path": "examples/aobench/src/main.rs",
    "chars": 2798,
    "preview": "//! aobench: Ambient Occlusion Renderer benchmark.\n//!\n//! Based on [aobench](https://code.google.com/archive/p/aobench/"
  },
  {
    "path": "examples/aobench/src/random.rs",
    "chars": 4359,
    "preview": "//! Pseudo random number generators.\n//!\n//! Currently only `LFSR113` is implemented, since that is what ISPC uses, and "
  },
  {
    "path": "examples/aobench/src/scalar.rs",
    "chars": 2003,
    "preview": "//! Scalar serial aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{Ray, V3D};\nuse crate::intersection::{Inte"
  },
  {
    "path": "examples/aobench/src/scalar_parallel.rs",
    "chars": 2302,
    "preview": "//! Scalar parallel aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{Ray, V3D};\nuse crate::intersection::{In"
  },
  {
    "path": "examples/aobench/src/scene/mod.rs",
    "chars": 1575,
    "preview": "/// Scene interface\nuse crate::geometry::{f32xN, Plane, Sphere};\n\npub trait Scene: Send + Sync + Default {\n    const NAO"
  },
  {
    "path": "examples/aobench/src/scene/random.rs",
    "chars": 1719,
    "preview": "//! Aobench scene: 3 spheres and a plane using a random number generator\n\nuse crate::geometry::{f32xN, Plane, Sphere, V3"
  },
  {
    "path": "examples/aobench/src/scene/test.rs",
    "chars": 2066,
    "preview": "//! Aobench scene: 3 spheres and a plane using a random number generator\n\nuse crate::geometry::{Plane, Sphere, V3D};\nuse"
  },
  {
    "path": "examples/aobench/src/tiled.rs",
    "chars": 4864,
    "preview": "//! SIMD serial aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{f32xN, pf32xN, usizexN, IncrV, RayxN, V3DxN"
  },
  {
    "path": "examples/aobench/src/tiled_parallel.rs",
    "chars": 3257,
    "preview": "//! SIMD tiled parallel aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{f32xN, pf32xN, usizexN, IncrV, Rayx"
  },
  {
    "path": "examples/aobench/src/vector.rs",
    "chars": 3947,
    "preview": "//! SIMD serial aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{Ray, V3D};\nuse crate::intersection::{Inters"
  },
  {
    "path": "examples/aobench/src/vector_parallel.rs",
    "chars": 2200,
    "preview": "//! SIMD parallel aobench\n\nuse crate::ambient_occlusion;\nuse crate::geometry::{Ray, V3D};\nuse crate::intersection::{Inte"
  },
  {
    "path": "examples/aobench/volta/.gitignore",
    "chars": 15,
    "preview": "ao\n*.ppm\nobjs/\n"
  },
  {
    "path": "examples/aobench/volta/ao.ispc",
    "chars": 8118,
    "preview": "// -*- mode: c++ -*-\n/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in"
  },
  {
    "path": "examples/dot_product/Cargo.toml",
    "chars": 253,
    "preview": "[package]\nname = \"dot_product\"\nversion = \"0.1.0\"\nauthors = [\"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>\"]\nedition = "
  },
  {
    "path": "examples/dot_product/readme.md",
    "chars": 21,
    "preview": "# Vector dot product\n"
  },
  {
    "path": "examples/dot_product/src/lib.rs",
    "chars": 724,
    "preview": "//! Vector dot product\n#![deny(rust_2018_idioms)]\n#![feature(custom_inner_attributes)]\n#![allow(clippy::must_use_candida"
  },
  {
    "path": "examples/dot_product/src/scalar.rs",
    "chars": 224,
    "preview": "//! Scalar implementation\n\npub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {\n    assert_eq!(a.len(), b.len());\n    a.iter()"
  },
  {
    "path": "examples/dot_product/src/simd.rs",
    "chars": 426,
    "preview": "//! Scalar implementation\n\nuse packed_simd::f32x4;\n\npub fn dot_prod(a: &[f32], b: &[f32]) -> f32 {\n    assert_eq!(a.len("
  },
  {
    "path": "examples/fannkuch_redux/Cargo.toml",
    "chars": 297,
    "preview": "[package]\nname = \"fannkuch_redux\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nedition = \"2018\"\n\n[depe"
  },
  {
    "path": "examples/fannkuch_redux/readme.md",
    "chars": 1542,
    "preview": "# Fannkuch redux\n\nThis is the [`fannkuch redux` benchmark from the benchmarksgame][bg]. \n\n## Background and description\n"
  },
  {
    "path": "examples/fannkuch_redux/src/fannkuchredux-output.txt",
    "chars": 24,
    "preview": "228\nPfannkuchen(7) = 16\n"
  },
  {
    "path": "examples/fannkuch_redux/src/lib.rs",
    "chars": 581,
    "preview": "//! Fannkuch redux\n#![deny(warnings, rust_2018_idioms)]\n#![allow(non_snake_case, non_camel_case_types)]\n#![allow(\n    cl"
  },
  {
    "path": "examples/fannkuch_redux/src/main.rs",
    "chars": 1611,
    "preview": "#![deny(rust_2018_idioms)]\n\nuse fannkuch_redux_lib::*;\n\nfn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {\n   "
  },
  {
    "path": "examples/fannkuch_redux/src/scalar.rs",
    "chars": 3323,
    "preview": "//! Scalar fannkuch redux implementation\n\nuse std::{cmp, mem, thread};\n\n// FIXME: replace with slice rotate\nfn rotate(x:"
  },
  {
    "path": "examples/fannkuch_redux/src/simd.rs",
    "chars": 5497,
    "preview": "//! Vectorized fannkuch redux implementation\n\nuse packed_simd::*;\n\nstruct State {\n    s: [u8; 16],\n    flip_masks: [u8x1"
  },
  {
    "path": "examples/mandelbrot/Cargo.toml",
    "chars": 592,
    "preview": "[package]\nname = \"mandelbrot\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nbuild = \"build.rs\"\nedition "
  },
  {
    "path": "examples/mandelbrot/benchmark.sh",
    "chars": 892,
    "preview": "#!/usr/bin/env bash\n#\n# Runs mandelbrot benchmarks\n\nset -ex\n\nWIDTH=800\nHEIGHT=800\n\nif [[ ${NORUN} != 1 ]]; then\n    hash"
  },
  {
    "path": "examples/mandelbrot/build.rs",
    "chars": 786,
    "preview": "fn main() {\n    println!(\"cargo:rerun-if-changed=build.rs\");\n\n    #[cfg(feature = \"ispc\")]\n    {\n        if std::env::va"
  },
  {
    "path": "examples/mandelbrot/readme.md",
    "chars": 2322,
    "preview": "# Mandelbrot\n\nThis is the [`mandelbrot` benchmark from the benchmarksgame][bg].\n\n## Background\n\nhttp://mathworld.wolfram"
  },
  {
    "path": "examples/mandelbrot/src/ispc_tasks.rs",
    "chars": 672,
    "preview": "//! Includes the ISPC implementations.\nuse crate::*;\nuse ispc::*;\n\nispc_module!(mandelbrot);\n\npub fn generate(dims: Dime"
  },
  {
    "path": "examples/mandelbrot/src/lib.rs",
    "chars": 7846,
    "preview": "//! The mandelbrot benchmark from the [benchmarks game][bg].\n//!\n//! [bg]: https://benchmarksgame-team.pages.debian.net/"
  },
  {
    "path": "examples/mandelbrot/src/main.rs",
    "chars": 1267,
    "preview": "//! The Mandelbrot benchmark from the [benchmarksgame][bg]\n//!\n//! [bg]: https://benchmarksgame-team.pages.debian.net/be"
  },
  {
    "path": "examples/mandelbrot/src/scalar_par.rs",
    "chars": 2653,
    "preview": "//! Scalar mandelbrot implementation\n\nuse crate::*;\n\n/// Complex number\n#[repr(align(16))]\n#[derive(Copy, Clone)]\nstruct"
  },
  {
    "path": "examples/mandelbrot/src/simd_par.rs",
    "chars": 4320,
    "preview": "//! Vectorized parallel Mandelbrot implementation\n#![allow(non_camel_case_types)]\n\nuse crate::*;\nuse packed_simd::*;\n\nty"
  },
  {
    "path": "examples/mandelbrot/volta/mandelbrot.ispc",
    "chars": 3140,
    "preview": "/*\n  Copyright (c) 2010-2012, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/matrix_inverse/Cargo.toml",
    "chars": 259,
    "preview": "[package]\nname = \"matrix_inverse\"\nversion = \"0.1.0\"\nauthors = [\"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>\"]\nedition"
  },
  {
    "path": "examples/matrix_inverse/readme.md",
    "chars": 22,
    "preview": "# 4x4 matrix inverse \n"
  },
  {
    "path": "examples/matrix_inverse/src/lib.rs",
    "chars": 1407,
    "preview": "//! 4x4 matrix inverse\n#![feature(custom_inner_attributes)]\n#![deny(rust_2018_idioms)]\n#![allow(clippy::must_use_candida"
  },
  {
    "path": "examples/matrix_inverse/src/scalar.rs",
    "chars": 5077,
    "preview": "//! Scalar implementation\n#[rustfmt::skip]\nuse crate::*;\n\n#[allow(clippy::too_many_lines)]\npub fn inv4x4(m: Matrix4x4) -"
  },
  {
    "path": "examples/matrix_inverse/src/simd.rs",
    "chars": 3316,
    "preview": "//! 4x4 matrix inverse using SIMD\nuse crate::*;\nuse packed_simd::shuffle;\n\nuse packed_simd::f32x4;\n\npub fn inv4x4(m: Mat"
  },
  {
    "path": "examples/nbody/Cargo.toml",
    "chars": 392,
    "preview": "[package]\nname = \"nbody\"\nversion = \"0.1.0\"\nauthors = [\"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>\"]\nedition = \"2018\""
  },
  {
    "path": "examples/nbody/benches/algs.rs",
    "chars": 331,
    "preview": "//! n-body benchmarks\n#![feature(test)]\n\nextern crate nbody_lib;\nextern crate test;\n\nuse test::{black_box, Bencher};\n\n#["
  },
  {
    "path": "examples/nbody/readme.md",
    "chars": 710,
    "preview": "# N-Body\n\nThis is the [`n-body` benchmark from the benchmarksgame][bg]. It models the orbits\nof Jovian planets, using th"
  },
  {
    "path": "examples/nbody/src/lib.rs",
    "chars": 615,
    "preview": "//! The N-body benchmark from the [benchmarks game][bg].\n//!\n//! [bg]: https://benchmarksgame-team.pages.debian.net/benc"
  },
  {
    "path": "examples/nbody/src/main.rs",
    "chars": 1815,
    "preview": "//! The N-body benchmark from the [benchmarks game][bg].\n//!\n//! [bg]: https://benchmarksgame-team.pages.debian.net/benc"
  },
  {
    "path": "examples/nbody/src/nbody-output.txt",
    "chars": 26,
    "preview": "-0.169075164\n-0.169087605\n"
  },
  {
    "path": "examples/nbody/src/scalar.rs",
    "chars": 4830,
    "preview": "// The Computer Language Benchmarks Game\n// https://benchmarksgame-team.pages.debian.net\n//\n// contributed by the Rust P"
  },
  {
    "path": "examples/nbody/src/simd.rs",
    "chars": 4518,
    "preview": "#![deny(warnings)]\n\nuse packed_simd::*;\n\nuse std::f64::consts::PI;\nconst SOLAR_MASS: f64 = 4.0 * PI * PI;\nconst DAYS_PER"
  },
  {
    "path": "examples/options_pricing/Cargo.toml",
    "chars": 573,
    "preview": "[package]\nname = \"options_pricing\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nedition = \"2018\"\n\n[dep"
  },
  {
    "path": "examples/options_pricing/benchmark.sh",
    "chars": 1114,
    "preview": "#!/usr/bin/env bash\n#\n# Runs options_pricing benchmarks\n\nset -ex\n\nNUM_OPTIONS_BLACK_SCHOLES=10000000\n\nif [[ ${NORUN} != "
  },
  {
    "path": "examples/options_pricing/build.rs",
    "chars": 950,
    "preview": "fn main() {\n    println!(\"cargo:rerun-if-changed=build.rs\");\n\n    #[cfg(feature = \"ispc\")]\n    {\n        if std::env::va"
  },
  {
    "path": "examples/options_pricing/readme.md",
    "chars": 1490,
    "preview": "# Options Pricing ISPC example\n\nThis is the [`options` ISPC benchmark][ispc]:\n\n> This program implements both the Black-"
  },
  {
    "path": "examples/options_pricing/src/ispc_.rs",
    "chars": 3027,
    "preview": "//! Includes the ISPC implementations.\n\nuse ispc::*;\nispc_module!(options);\n\npub mod black_scholes {\n    use super::*;\n\n"
  },
  {
    "path": "examples/options_pricing/src/lib.rs",
    "chars": 1672,
    "preview": "#![deny(rust_2018_idioms)]\n#![allow(\n    clippy::inline_always,\n    clippy::many_single_char_names,\n    clippy::excessiv"
  },
  {
    "path": "examples/options_pricing/src/main.rs",
    "chars": 2517,
    "preview": "#![deny(warnings, rust_2018_idioms)]\n#![feature(custom_inner_attributes)]\n\nuse options_pricing_lib::*;\n\n#[rustfmt::skip]"
  },
  {
    "path": "examples/options_pricing/src/scalar.rs",
    "chars": 3060,
    "preview": "//! Scalar implementation\n\n// Cumulative normal distribution function\n#[inline(always)]\nfn cnd(x: f32) -> f32 {\n    cons"
  },
  {
    "path": "examples/options_pricing/src/simd.rs",
    "chars": 2336,
    "preview": "//! SIMD implementation\n\nuse crate::f32s;\n\npub fn serial<K>(\n    sa: &[f32], xa: &[f32], ta: &[f32], ra: &[f32], va: &[f"
  },
  {
    "path": "examples/options_pricing/src/simd_kernels.rs",
    "chars": 1581,
    "preview": "use crate::f32s;\n\n// Cumulative normal distribution function\n#[inline(always)]\npub fn cnd(x: f32s) -> f32s {\n    const I"
  },
  {
    "path": "examples/options_pricing/src/simd_par.rs",
    "chars": 2549,
    "preview": "//! SIMD implementation\n\nuse crate::f32s;\n\npub fn parallel<K>(\n    sa: &[f32], xa: &[f32], ta: &[f32], ra: &[f32], va: &"
  },
  {
    "path": "examples/options_pricing/src/sum.rs",
    "chars": 618,
    "preview": "//! Implements different algorithms for summing a slice of `f32`s\n\nuse super::{f32s, f64s};\n\npub fn slice(x: &[f32]) -> "
  },
  {
    "path": "examples/options_pricing/volta/options.ispc",
    "chars": 5834,
    "preview": "// -*- mode: c++ -*-\n/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in"
  },
  {
    "path": "examples/options_pricing/volta/options_defs.h",
    "chars": 1681,
    "preview": "/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/rust-toolchain",
    "chars": 7,
    "preview": "nightly"
  },
  {
    "path": "examples/slice_sum/Cargo.toml",
    "chars": 276,
    "preview": "[package]\nname = \"slice_sum\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nedition = \"2018\"\n\n[[bin]]\nna"
  },
  {
    "path": "examples/slice_sum/readme.md",
    "chars": 494,
    "preview": "# Computes the sum of a slice of floating-point numbers\n\nThis example show-cases the performance difference of computing"
  },
  {
    "path": "examples/slice_sum/src/main.rs",
    "chars": 2504,
    "preview": "#![deny(rust_2018_idioms)]\n\nuse packed_simd::f32x8 as f32s;\nuse std::{mem, slice};\n\nfn init(n: usize) -> Vec<f32> {\n    "
  },
  {
    "path": "examples/spectral_norm/Cargo.toml",
    "chars": 294,
    "preview": "[package]\nname = \"spectral_norm\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nedition = \"2018\"\n\n[depen"
  },
  {
    "path": "examples/spectral_norm/readme.md",
    "chars": 1007,
    "preview": "# Spectral norm\n\nThis is the [`spectral-norm` benchmark from the benchmarksgame][bg]. \n\n## Background and description\n\nM"
  },
  {
    "path": "examples/spectral_norm/src/lib.rs",
    "chars": 493,
    "preview": "//! Spectral Norm\n#![deny(rust_2018_idioms)]\n#![allow(non_snake_case, non_camel_case_types)]\n#![allow(\n    clippy::cast_"
  },
  {
    "path": "examples/spectral_norm/src/main.rs",
    "chars": 1496,
    "preview": "extern crate spectral_norm_lib;\nuse spectral_norm_lib::*;\n\nfn run<O: std::io::Write>(o: &mut O, n: usize, alg: usize) {\n"
  },
  {
    "path": "examples/spectral_norm/src/scalar.rs",
    "chars": 1709,
    "preview": "//! Scalar spectral norm implementation\n\nuse crate::*;\nuse std::{\n    iter::*,\n    ops::{Add, Div},\n};\n\nstruct f64x2(f64"
  },
  {
    "path": "examples/spectral_norm/src/simd.rs",
    "chars": 1675,
    "preview": "//! Vectorized spectral norm implementation\n\nuse crate::*;\nuse packed_simd::*;\n\nfn mult_Av(v: &[f64], out: &mut [f64]) {"
  },
  {
    "path": "examples/spectral_norm/src/spectralnorm-output.txt",
    "chars": 12,
    "preview": "1.274219991\n"
  },
  {
    "path": "examples/stencil/Cargo.toml",
    "chars": 522,
    "preview": "[package]\nname = \"stencil\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nedition = \"2018\"\n\n[dependencie"
  },
  {
    "path": "examples/stencil/benchmark.sh",
    "chars": 790,
    "preview": "#!/usr/bin/env bash\n#\n# Runs aobench benchmarks\n\nset -ex\n\nif [[ ${NORUN} != 1 ]]; then\n    hash hyperfine 2>/dev/null ||"
  },
  {
    "path": "examples/stencil/build.rs",
    "chars": 780,
    "preview": "fn main() {\n    println!(\"cargo:rerun-if-changed=build.rs\");\n\n    #[cfg(feature = \"ispc\")]\n    {\n        if std::env::va"
  },
  {
    "path": "examples/stencil/readme.md",
    "chars": 1912,
    "preview": "# Stencil\n\nThis is the generic [`stencil` ISPC benchmark][ispc]. \n\n## Usage\n\n```\ncargo run --release --features=ispc\n```"
  },
  {
    "path": "examples/stencil/src/ispc_loops.rs",
    "chars": 1229,
    "preview": "//! Includes the ISPC implementations.\n\nuse ispc::*;\nispc_module!(stencil);\n\npub fn serial(\n    t0: i32, t1: i32, x0: i3"
  },
  {
    "path": "examples/stencil/src/lib.rs",
    "chars": 4487,
    "preview": "#![feature(custom_inner_attributes, stmt_expr_attributes)]\n// FIXME: Null pointer deref warning triggered in this exampl"
  },
  {
    "path": "examples/stencil/src/main.rs",
    "chars": 1355,
    "preview": "#![feature(custom_inner_attributes)]\n\nuse stencil_lib::*;\n\nuse std::env;\n\n#[rustfmt::skip]\nfn run<F>(name: &str, f: F)\nw"
  },
  {
    "path": "examples/stencil/src/scalar.rs",
    "chars": 2320,
    "preview": "//! Scalar implementation\n\npub fn step(\n    x0: i32, x1: i32, y0: i32, y1: i32, z0: i32, z1: i32, n_x: i32, n_y: i32,\n  "
  },
  {
    "path": "examples/stencil/src/simd.rs",
    "chars": 7943,
    "preview": "//! SIMD implementation\n\nuse packed_simd::*;\n\n#[inline(always)]\npub(crate) fn step_x8(\n    x0: i32, x1: i32, y0: i32, y1"
  },
  {
    "path": "examples/stencil/src/simd_par.rs",
    "chars": 5333,
    "preview": "//! SIMD+Rayon implementation.\nuse crate::simd::step_x8;\nuse rayon::prelude::*;\n\n#[inline(always)]\nfn x8_par_impl(\n    t"
  },
  {
    "path": "examples/stencil/volta/.gitignore",
    "chars": 38,
    "preview": "# Files built by ISPC\n/objs/\n/stencil\n"
  },
  {
    "path": "examples/stencil/volta/Makefile",
    "chars": 213,
    "preview": "\nEXAMPLE=stencil\nCPP_SRC=stencil.cpp stencil_serial.cpp\nISPC_SRC=stencil.ispc\nISPC_IA_TARGETS=sse2-i32x4,sse4-i32x4,avx1"
  },
  {
    "path": "examples/stencil/volta/common.mk",
    "chars": 3833,
    "preview": "\nTASK_CXX=tasksys.cpp\nTASK_LIB=-lpthread\nTASK_OBJ=objs/tasksys.o\n\nCXX=clang++\nCXXFLAGS+=-Iobjs/ -O3 -march=native\nCC=cla"
  },
  {
    "path": "examples/stencil/volta/stencil.cpp",
    "chars": 6373,
    "preview": "/*\n  Copyright (c) 2010-2014, Intel Corporation\n  All rights reserved.\n  Redistribution and use in source and binary for"
  },
  {
    "path": "examples/stencil/volta/stencil.ispc",
    "chars": 5240,
    "preview": "/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/stencil/volta/stencil_serial.cpp",
    "chars": 3714,
    "preview": "/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/stencil/volta/tasksys.cpp",
    "chars": 42325,
    "preview": "/*\n  Copyright (c) 2011-2012, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/stencil/volta/timing.h",
    "chars": 3860,
    "preview": "/*\n  Copyright (c) 2010-2011, Intel Corporation\n  All rights reserved.\n\n  Redistribution and use in source and binary fo"
  },
  {
    "path": "examples/triangle_xform/Cargo.toml",
    "chars": 255,
    "preview": "[package]\nname = \"triangle_xform\"\nversion = \"0.1.0\"\nauthors = [\"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>\"]\nedition"
  },
  {
    "path": "examples/triangle_xform/readme.md",
    "chars": 2214,
    "preview": "# Transforming triangle vertices using a transformation matrix\n\n## Description\n\nThis example contains the SIMD implement"
  },
  {
    "path": "examples/triangle_xform/src/lib.rs",
    "chars": 2916,
    "preview": "#![allow(clippy::must_use_candidate)]\n\n/// Simple matrix type.\n/// The memory layout is the same as the one for Direct3D"
  },
  {
    "path": "examples/triangle_xform/src/scalar.rs",
    "chars": 1770,
    "preview": "use super::Matrix;\n\n/// Vertex data: a single 3D vector of floats, representing position.\npub type Vertex = [f32; 3];\n\n/"
  },
  {
    "path": "examples/triangle_xform/src/simd.rs",
    "chars": 2369,
    "preview": "use super::Matrix;\n\n/// SIMD vector of floats\npub type VecF = packed_simd::f32x8;\n\n/// SIMD batch of N triangles, where "
  },
  {
    "path": "micro_benchmarks/Cargo.toml",
    "chars": 397,
    "preview": "[package]\nname = \"micro_benchmarks\"\nversion = \"0.1.0\"\nauthors = [\"gnzlbg <gonzalobg88@gmail.com>\"]\nautobenches = false\ne"
  },
  {
    "path": "micro_benchmarks/benches/mask_reductions.rs",
    "chars": 2503,
    "preview": "//! Benchmarks for the mask reductions `all`, `any`, and `none`.\n#![deny(rust_2018_idioms)]\n#![feature(test)]\n\nuse packe"
  },
  {
    "path": "micro_benchmarks/rust-toolchain",
    "chars": 7,
    "preview": "nightly"
  },
  {
    "path": "perf-guide/.gitignore",
    "chars": 6,
    "preview": "/book\n"
  },
  {
    "path": "perf-guide/book.toml",
    "chars": 296,
    "preview": "[book]\nauthors = [\"Gonzalo Brito Gadeschi\", \"Gabriel Majeri\"]\nmultilingual = false\nsrc = \"src\"\ntitle = \"Rust SIMD Perfor"
  },
  {
    "path": "perf-guide/src/SUMMARY.md",
    "chars": 794,
    "preview": "# Summary\n\n[Introduction](./introduction.md)\n\n- [Floating-point Math](./float-math/fp.md)\n  - [Short-vector Math Library"
  },
  {
    "path": "perf-guide/src/ascii.css",
    "chars": 134,
    "preview": "code {\n    /* \"Source Code Pro\" breaks ASCII art */\n    font-family: Consolas, \"Ubuntu Mono\", Menlo, \"DejaVu Sans Mono\","
  },
  {
    "path": "perf-guide/src/bound_checks.md",
    "chars": 911,
    "preview": "# Bounds checking\n\nReading and writing packed vectors to/from slices is checked by default.\nIndependently of the configu"
  },
  {
    "path": "perf-guide/src/float-math/approx.md",
    "chars": 265,
    "preview": "# Approximate functions\n\n<!-- TODO:\n\nExplain that they exists, that they are often _much_ faster, how to use them,\nthat "
  },
  {
    "path": "perf-guide/src/float-math/fma.md",
    "chars": 186,
    "preview": "# Fused Multiply Add\n\n<!-- TODO:\nExplain that this is a compound operation, infinite precision, difference\nbetween `mul_"
  },
  {
    "path": "perf-guide/src/float-math/fp.md",
    "chars": 108,
    "preview": "# Floating-point math\n\nThis chapter contains information pertaining to working with floating-point numbers.\n"
  },
  {
    "path": "perf-guide/src/float-math/svml.md",
    "chars": 165,
    "preview": "# Short Vector Math Library\n\n<!-- TODO:\nExplain how is short-vector math performed by default (just scalarized libm call"
  },
  {
    "path": "perf-guide/src/introduction.md",
    "chars": 638,
    "preview": "# Introduction\n\n## What is SIMD\n\n<!-- TODO:\ndescribe what SIMD is, which algorithms can benefit from it,\ngive usage exam"
  },
  {
    "path": "perf-guide/src/prof/linux.md",
    "chars": 3464,
    "preview": "# Performance profiling on Linux\n\n## Using `perf`\n\n[perf](https://perf.wiki.kernel.org/) is the most powerful performanc"
  },
  {
    "path": "perf-guide/src/prof/mca.md",
    "chars": 3624,
    "preview": "# Machine code analysis tools\n\n## The microarchitecture of modern CPUs\n\nWhile you might have heard of Instruction Set Ar"
  },
  {
    "path": "perf-guide/src/prof/profiling.md",
    "chars": 738,
    "preview": "# Performance profiling\n\nWhile the rest of the book provides practical advice on how to improve the performance\nof SIMD "
  },
  {
    "path": "perf-guide/src/target-feature/attribute.md",
    "chars": 91,
    "preview": "# The `target_feature` attribute\n\n<!-- TODO:\nExplain the `#[target_feature]` attribute\n-->\n"
  },
  {
    "path": "perf-guide/src/target-feature/features.md",
    "chars": 745,
    "preview": "# Enabling target features\n\nNot all processors of a certain architecture will have SIMD processing units,\nand using a SI"
  },
  {
    "path": "perf-guide/src/target-feature/inlining.md",
    "chars": 97,
    "preview": "# Inlining\n\n<!-- TODO:\nExplain how the `#[target_feature]` attribute interacts with inlining\n-->\n"
  },
  {
    "path": "perf-guide/src/target-feature/practice.md",
    "chars": 1486,
    "preview": "# Target features in practice\n\nUsing `RUSTFLAGS` will allow the crate being compiled, as well as all its\ntransitive depe"
  },
  {
    "path": "perf-guide/src/target-feature/runtime.md",
    "chars": 82,
    "preview": "# Detecting host features at runtime\n\n<!-- TODO:\nExplain cost (how it works).\n-->\n"
  },
  {
    "path": "perf-guide/src/target-feature/rustflags.md",
    "chars": 2625,
    "preview": "# Using RUSTFLAGS\n\nOne of the easiest ways to benefit from SIMD is to allow the compiler\nto generate code using certain "
  },
  {
    "path": "perf-guide/src/vert-hor-ops.md",
    "chars": 2631,
    "preview": "# Vertical and horizontal operations\n\nIn SIMD terminology, each vector has a certain \"width\" (number of lanes).\nA vector"
  }
]

// ... and 163 more files (download for full content)

About this extraction

This page contains the full source code of the rust-lang-nursery/packed_simd GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 363 files (939.2 KB), approximately 281.6k tokens, and a symbol index with 747 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo